1 Global Setup

1.1 Path

before you start: .rs.restartR()

GitClonePath <- "/Users/admin/Desktop/ElbeEstuarineZander"
setwd(GitClonePath)

1.2 Packages

# R package management tools such as packrat or renv. These tools allow you to create isolated environments with specific versions of R and installed packages, ensuring reproducibility and avoiding conflicts with other packages installed in your system.
#install.packages("packrat")
#packrat::init()
#packrat::snapshot()
#packrat::restore()

# if (!require("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")
# BiocManager::install(c(
#   "DESeq2",
#   "AnnotationDbi", 
#   "AnnotationHub", 
#   "Biobase", 
#   "GSEABase", 
#   "DECIPHER", 
#   "ComplexHeatmap", 
#   "Biostrings", 
#   "BiocParallel", 
#   "BiocNeighbors", 
#   "BiocFileCache", 
#   "BiocBaseUtils", 
#   "GO.db", 
#   "GOSemSim", 
#   "ShortRead", 
#   "SummarizedExperiment", 
#   "TreeSummarizedExperiment", 
#   "phyloseq", 
#   "tidyverse",
#   "plyr",
#   "dplyr",
#   "DESeq2",
#   "cowplot",
#   "ggrepel",
#   "PCAtools",
#   "ComplexHeatmap",
#   "WGCNA",
#   "dada2",
#   "ShortRead",
#   "phyloseq",
#   "factoextra",
#   "microbiome", #clr transformation
#   "mia",
#   "clusterProfiler", 
#   "GenomeInfoDbData"
#   ), force =T)

Most of the packages needed here are stored in the packrat::restore

1.2 Functions

1.4 Input Files

#########################
#Read DADA2 Output files#
taxa  <-readRDS(file.path(path_Input_16S, "SL_04.01.2024_Taxa_Species.rds"))
seqtab.nochim <-  readRDS(file.path(path_Input_16S, "SL_04.01.2024_seqtab.nochim.rds"))
ReadNum1 <-  read.csv2(file.path(path_Input_16S, "Batch1-31.07.23_470_Track_reads_through_pipeline.csv"))
ReadNum2 <-  read.csv2(file.path(path_Input_16S, "Batch1-Reseq-31.07.23_470_Track_reads_through_pipeline.csv"))
ReadNum3 <-  read.csv2(file.path(path_Input_16S, "Batch2-15.09.23_470_Track_reads_through_pipeline.csv"))

########################################################################
#Import samplefile and filter for samples with successful 16S sequencing
SAMDF<- read.table(file=file.path(path_Input_RNA, "SL_samplefile_04.01.2024.csv") ,sep=";",dec = ".")
# Get all IDs present
SAMDF16S<-dplyr::filter(SAMDF, DNA16S == "yes") 
rownames(SAMDF16S) <- SAMDF16S$SampleID

1.5 Tutorials

#-

2 Phyloseq

#Create a phyloseq object from Dada2 Output
library(tidyverse)
taxa  <-readRDS(file.path(path_Input_16S, "SL_04.01.2024_Taxa_Species.rds"))
seqtab.nochim <-  readRDS(file.path(path_Input_16S, "SL_04.01.2024_seqtab.nochim.rds"))

taxa<-as.matrix(taxa)
library(phyloseq)
ps <- phyloseq(otu_table(seqtab.nochim, taxa_are_rows=FALSE), 
tax_table(taxa))

dna <- Biostrings::DNAStringSet(taxa_names(ps))
names(dna) <- taxa_names(ps)
ps <- merge_phyloseq(ps, dna)
taxa_names(ps) <- paste0("ASV", seq(ntaxa(ps)))
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 42523 taxa and 28 samples ]
## tax_table()   Taxonomy Table:    [ 42523 taxa by 7 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 42523 reference sequences ]
head(sample_names(ps))
## [1] "SLSU21BBEB7" "SLSU21MGEB7" "SLSU21MLEB9" "SLSU21SSEB9" "WFSU21MLFL" 
## [6] "WFSU21SSFL"
ReadNum1 <-  read.csv2(file.path(path_Input_16S, "Batch1-31.07.23_470_Track_reads_through_pipeline.csv"))
ReadNum2 <-  read.csv2(file.path(path_Input_16S, "Batch1-Reseq-31.07.23_470_Track_reads_through_pipeline.csv"))
ReadNum3 <-  read.csv2(file.path(path_Input_16S, "Batch2-15.09.23_470_Track_reads_through_pipeline.csv"))

ReadNum <- rbind(ReadNum1, ReadNum2, ReadNum3)
ReadNum<-ReadNum[,c("X", "input")]
colnames(ReadNum)<-c("SampleID", "Input")
ReadNum <- ReadNum %>%
  dplyr::group_by(SampleID) %>%
  dplyr::summarise(Input = sum(Input))
nonchim <- as.data.frame(rowSums(otu_table(ps)))
names(nonchim) <- "nonchim"
ReadNum<- dplyr::left_join(rownames_to_column(nonchim), ReadNum, by=c("rowname" = "SampleID"))
colnames(ReadNum)<-c("SampleID", "nonchim", "Input")
rownames(ReadNum) <- ReadNum$SampleID

OTU <- otu_table(ps)
TAX <- tax_table(ps) 
REFSEQ <- refseq(ps)
ps <- phyloseq(otu_table(OTU, taxa_are_rows=FALSE), 
               sample_data(ReadNum), tax_table(TAX), refseq(REFSEQ))

rowSums(otu_table(ps))
## SLSU21BBEB7 SLSU21MGEB7 SLSU21MLEB9 SLSU21SSEB9  WFSU21MLFL  WFSU21SSFL 
##       15073       34566       29958       26008       28393        7889 
##     Elbe915     Elbe917 SLSU21BBEB1 SLSU21BBEB2 SLSU21BBEB4 SLSU21BBEB6 
##       15128       13641       17695       26679        7946       10390 
## SLSU21BBEB9 SLSU21MGEB1 SLSU21MGEB2 SLSU21MGEB3 SLSU21MGEB4 SLSU21MGEB5 
##       73914       26965       16244       22180       18040       12653 
## SLSU21MLEB1 SLSU21MLEB2 SLSU21MLEB5 SLSU21MLEB6 SLSU21MLEB7 SLSU21SSEB2 
##       12523       39283       32031       33032       16014       15087 
## SLSU21SSEB3 SLSU21SSEB5 SLSU21SSEB6 SLSU21SSEB7 
##       33062       11534       21561       14757
sample_names(ps)
##  [1] "SLSU21BBEB7" "SLSU21MGEB7" "SLSU21MLEB9" "SLSU21SSEB9" "WFSU21MLFL" 
##  [6] "WFSU21SSFL"  "Elbe915"     "Elbe917"     "SLSU21BBEB1" "SLSU21BBEB2"
## [11] "SLSU21BBEB4" "SLSU21BBEB6" "SLSU21BBEB9" "SLSU21MGEB1" "SLSU21MGEB2"
## [16] "SLSU21MGEB3" "SLSU21MGEB4" "SLSU21MGEB5" "SLSU21MLEB1" "SLSU21MLEB2"
## [21] "SLSU21MLEB5" "SLSU21MLEB6" "SLSU21MLEB7" "SLSU21SSEB2" "SLSU21SSEB3"
## [26] "SLSU21SSEB5" "SLSU21SSEB6" "SLSU21SSEB7"
sample_names(ps)[sample_names(ps) == "Elbe915"] <- "WFSU21MGEB"
sample_names(ps)[sample_names(ps) == "Elbe917"] <- "WFSU21BBEB"

rowSums(otu_table(ps))
## SLSU21BBEB7 SLSU21MGEB7 SLSU21MLEB9 SLSU21SSEB9  WFSU21MLFL  WFSU21SSFL 
##       15073       34566       29958       26008       28393        7889 
##  WFSU21MGEB  WFSU21BBEB SLSU21BBEB1 SLSU21BBEB2 SLSU21BBEB4 SLSU21BBEB6 
##       15128       13641       17695       26679        7946       10390 
## SLSU21BBEB9 SLSU21MGEB1 SLSU21MGEB2 SLSU21MGEB3 SLSU21MGEB4 SLSU21MGEB5 
##       73914       26965       16244       22180       18040       12653 
## SLSU21MLEB1 SLSU21MLEB2 SLSU21MLEB5 SLSU21MLEB6 SLSU21MLEB7 SLSU21SSEB2 
##       12523       39283       32031       33032       16014       15087 
## SLSU21SSEB3 SLSU21SSEB5 SLSU21SSEB6 SLSU21SSEB7 
##       33062       11534       21561       14757
sample_names(ps)
##  [1] "SLSU21BBEB7" "SLSU21MGEB7" "SLSU21MLEB9" "SLSU21SSEB9" "WFSU21MLFL" 
##  [6] "WFSU21SSFL"  "WFSU21MGEB"  "WFSU21BBEB"  "SLSU21BBEB1" "SLSU21BBEB2"
## [11] "SLSU21BBEB4" "SLSU21BBEB6" "SLSU21BBEB9" "SLSU21MGEB1" "SLSU21MGEB2"
## [16] "SLSU21MGEB3" "SLSU21MGEB4" "SLSU21MGEB5" "SLSU21MLEB1" "SLSU21MLEB2"
## [21] "SLSU21MLEB5" "SLSU21MLEB6" "SLSU21MLEB7" "SLSU21SSEB2" "SLSU21SSEB3"
## [26] "SLSU21SSEB5" "SLSU21SSEB6" "SLSU21SSEB7"
saveRDS(ps, file.path(path_Output_16S, "SL_ps_16S_merge_17.01.24.rds"))

-

3 Analysis

3.1 Setup Analysis

#Date of the Analysis for save-names#
Date <- "16.01.2024"
set.seed(123)
Species    <- "SL"
Tissue     <- "Gill"
Year       <- "2021"
Season     <- "Summer"
Type       <- "16S"
alpha      <- 0.05
OperatingSystem <- "Windows"
prefix <- "SSU-"
#Differential Abundance Testing
#####################
variable = Comparisons = "Replicate2"
#####################
VariableOrder<-c("SLSU21MG","SLSU21BB", "SLSU21SS", "SLSU21ML", "WF")
LocOrder=c("Medem Grund", "Brunsbuettel", "Schwarztonnen Sand","Muehlenberger Loch")
save_name <- paste(Species,Year,Season,sep = "_")
#Check your Output: 
paste0(file.path(path_Output_16S, "DAT_"), save_name, ".RData")
## [1] "/Users/admin/Desktop/ElbeEstuarineZander/SL_Output_16S/DAT_SL_2021_Summer.RData"
SL_SSU_Samples<-c(
"WFSU21MGEB", "WFSU21BBEB", "WFSU21SSFL", "WFSU21MLFL",
"SLSU21MGEB1","SLSU21MGEB2","SLSU21MGEB3","SLSU21MGEB4","SLSU21MGEB5","SLSU21MGEB7",
"SLSU21BBEB1","SLSU21BBEB2","SLSU21BBEB4","SLSU21BBEB6","SLSU21BBEB7","SLSU21BBEB9",
"SLSU21SSEB2","SLSU21SSEB3","SLSU21SSEB5","SLSU21SSEB6","SLSU21SSEB7","SLSU21SSEB9",
"SLSU21MLEB1","SLSU21MLEB2","SLSU21MLEB5","SLSU21MLEB6","SLSU21MLEB7","SLSU21MLEB9")

SAMDF16S$Reps <- sub("SLSU21", "", SAMDF16S$Replicate2)  
    RepOrder <-c("MG-713","BB-692", "SS-665", "ML-633", "WF")
    SAMDF16S$Reps<-ifelse(SAMDF16S$Reps == "MG", "MG-713",
                        ifelse(SAMDF16S$Reps == "BB", "BB-692",
                               ifelse(SAMDF16S$Reps == "SS", "SS-665",
                                      ifelse(SAMDF16S$Reps == "ML", "ML-633", "WF"))))

SAMDF16S$LOC<-ifelse(SAMDF16S$Loc == "Medem Grund", "MG-713",
                      ifelse(SAMDF16S$Loc == "Brunsbuettel", "BB-692",
                          ifelse(SAMDF16S$Loc == "Schwarztonnen Sand", "SS-665",
                              ifelse(SAMDF16S$Loc == "Muehlenberger Loch", "ML-633", NA))))

3.1.1 Create Color Code

phylum_colors <- c("Actinobacteriota"  = "red", 
          "Cyanobacteria"     = "green", 
          "Bacteroidota"      = "darkgreen",
          "Proteobacteria"    = "darkorange", 
          #"Patescibacteria"  = "pink", 
          "Acidobacteriota "  = "pink",
          "Planctomycetota"   = "purple", 
          "Verrucomicrobiota" = "blue", 
          "Chloroflexota"     = "cyan",
          "Deinococcota"      = "#FFFF00",
          "Gemmatimonadota"   = "#8F7C00",  
          "Other" = "grey20")

generate_shades <- function(base_color, num_variations) {
    color_ramp <- colorRampPalette(c(base_color, "white"))
    # Generate a vector of colors
    colors <- color_ramp(num_variations+1)
    # Create a data frame with the colors
    color_palette <- colors[1:(length(colors) - 1)]
    return(color_palette)}

3.2 Filter ASVs

#https://github.com/joey711/phyloseq/issues/574
require(phyloseq)
require(dada2)
require(tidyverse)
head(as.data.frame(rowSums(otu_table(ps))))
##             rowSums(otu_table(ps))
## SLSU21BBEB7                  15073
## SLSU21MGEB7                  34566
## SLSU21MLEB9                  29958
## SLSU21SSEB9                  26008
## WFSU21MLFL                   28393
## WFSU21SSFL                    7889
pslistraw <- list("ps" = ps)
for (i in names(pslistraw)[grepl("ps", names(pslistraw))]){
    ps <- pslistraw[[i]]
    g <- names(pslistraw[i])
    A = data.frame(sample_data(ps))
    A<-A %>% mutate(Run =case_when(
    A$SampleID %in% c(ReadNum2$X) ~ "Batch1_Run1merge",
    A$SampleID %in% c(ReadNum3$X) ~ "Batch2"))
    #PHY <-phy_tree(ps)
    OTU <- otu_table(ps, taxa_are_rows=FALSE)
    TAX <- tax_table(ps)
    REFSEQ <- refseq(ps)
    pslistraw[[i]] <- phyloseq(otu_table(OTU, taxa_are_rows=FALSE), 
               sample_data(A), tax_table(TAX), refseq(REFSEQ)) #, phy_tree(PHY)
}

ps_WF <-prune_samples(sample_names(pslistraw$ps)[grepl("WF", sample_names(pslistraw$ps))], pslistraw$ps)
ps_SL_21 <- prune_samples(sample_names(pslistraw$ps)[grepl("SLSU21MG|SLSU21BB|SLSU21SS|SLSU21ML", sample_names(pslistraw$ps))], pslistraw$ps)
ps_SLWF_21 <- prune_samples(sample_names(pslistraw$ps)[grepl("SLSU21MG|SLSU21BB|SLSU21SS|SLSU21ML|WFSU21", sample_names(pslistraw$ps))], pslistraw$ps)

pslistraw<- list("ps_WF" = ps_WF,"ps_SL_21" = ps_SL_21, "ps_SLWF_21" = ps_SLWF_21)

###########
#Save Data#
###########
saveRDS(pslistraw, file.path(path_Output_16S, paste(paste(save_name,"ps_16S_merge_pslistraw", Date, sep="_"), ".rds", sep="")))

#########################
#Adding real sample data#
#########################
for (i in names(pslistraw)[grepl("ps_WF|ps_SL_21|ps_SLWF_21", names(pslistraw))]){
    ps_SAMDF <- pslistraw[[i]]
    g<- i; print(g)

    A<-SAMDF16S[rownames(SAMDF16S) %in% rownames(sample_data(ps_SAMDF)),]
    B <-dplyr::left_join(A, sample_data(ps_SAMDF))
    rownames(B) <- B$SampleID
    B <-phyloseq::sample_data(B)

    #PHY <-phy_tree(ps)
    OTU <- phyloseq::otu_table(ps_SAMDF, taxa_are_rows=FALSE)
    TAX <- phyloseq::tax_table(ps_SAMDF)
    REFSEQ <- refseq(ps_SAMDF)
    pslistraw[[i]] <- phyloseq(OTU, TAX, B, REFSEQ) #, phy_tree(PHY)
}
## [1] "ps_WF"
## [1] "ps_SL_21"
## [1] "ps_SLWF_21"
applied_filter <- 0.005
print(paste("The Filtering Abundance is", applied_filter, "%"))
## [1] "The Filtering Abundance is 0.005 %"
pslist <- pslistraw
  for (i in names(pslist)[grepl("ps", names(pslist))]){
    require(cowplot)
    require(phyloseq)
    require(ggplot2)
    tryCatch({
    ps <- pslist[[i]]
    g<- names(pslist[i])
    ###### Pre-Filtering Step 1: Remove all not bacteria
    table(tax_table(ps)[, "Phylum"], exclude = NULL)
    ps_Filter <- ps %>%
    prune_taxa(taxa_sums(.) > 0, .) # prune OTUs that are not present in at least one sample
    ps_Filter1 <- ps_Filter %>% #Filter everything that is not bacteria
    subset_taxa(
    Kingdom == "Bacteria" &
    Family  != "mitochondria" &
    Class   != "Chloroplast")

    A<-as.data.frame(rowSums(otu_table(ps)))
    B<-as.data.frame(rowSums(otu_table(ps_Filter1)))
    #100 - B / A  #We loose reads by the merge steps
    print("We loose reads by Chloroplast & Mitochondria removal")
    print((B - A)/A*100)
    
    # WFAUT21TWEB_batch2                    -33.7557466
    # WFSU21BBEB_batch2                     -21.4285714
    # OESP22MLFL3                            -5.6184486
    # GCSU21SSEB8_batch2                     -4.3464177

    ###### Pre-Filtering Step 2: Remove all uncharacterized
    ps_Filter2 <- subset_taxa(ps_Filter1, !is.na(Phylum) & !Phylum %in% c("", "uncharacterized"))
    ps_Filter2
    table(tax_table(ps_Filter2)[, "Phylum"], exclude = NULL)

    ###### Pre-Filtering Step 3: Glom to species level
    #ps_Filter_glom = tax_glom(ps_Filter2, "Species", NArm = FALSE) 
    #head(tax_table(ps_Filter_glom))
    #following http://mixomics.org/mixmc/mixmc-preprocessing/
    #Arumugam et al., (2011)

    
    low.count.removal <- function(
                        data, # OTU count df of size n (sample) x p (OTU)
                        percent=0.005 # cutoff chosen
                        ) {
    keep.otu = which(colSums(data)*100/(sum(colSums(data))) > percent)
    data.filter = data[,keep.otu]
    return(list(data.filter = data.filter, keep.otu = keep.otu))}

    #######################################################
    #Plot consequence of filtering from Strand et al, 2021#
    #######################################################
    frac_zero <- c()
    all_sum   <- c()
    num_otu   <- c()
    seqp <- seq(0,0.1,0.0001)
    for(i in seqp){
      result.filter = low.count.removal(otu_table(ps_Filter2), percent=i)
      length(result.filter$keep.otu)
      otu.f = result.filter$data.filter
      frac_zero <- c(frac_zero, sum(otu.f == 00)/ (ncol(otu.f)*nrow(otu.f)))
      all_sum <- c(all_sum, sum(otu.f))
      num_otu <- c(num_otu, ncol(otu.f))
    }
    names(frac_zero) <- seqp
    names(all_sum) <- seqp

    # Get all_sum and num_otu to a fraction of the total
    mas <- max(all_sum)
    mno <- max(num_otu)

    all_sum %>% (function(x){x/max(x)}) -> all_sum
    num_otu %>% (function(x){x/max(x)}) -> num_otu
    data.frame(filter = seqp, 
           Percent.zeros   = frac_zero*100, 
           Total.abundance = all_sum*100, 
           Number.of.OTUs  = num_otu*100) %>% 
    tidyr::gather(key = "Type", value = "percent.of.total", -filter) %>%
    ggplot() + 
    geom_line(mapping = aes(x = filter, y = percent.of.total, col = Type)) +
    geom_vline(xintercept = applied_filter, alpha = 0.5, color = "red", linetype="dashed")+
    ylab("Percent of total") +
    xlab(paste("Filter at", applied_filter)) +
    theme(legend.title = element_blank()) -> FilterZerosPlot

    plot(FilterZerosPlot)
    ########################
    # remove low count OTUs#
    ########################
    result.filter <- low.count.removal(otu_table(ps_Filter2), percent=applied_filter) 
    data.filter <- result.filter$data.filter
    length(result.filter$keep.otu) # check how many OTUs remain

    ps_Filter3 <- prune_taxa(names(result.filter$keep.otu), ps_Filter2)

    tail(t(otu_table(ps_Filter3)))
    table(rowSums(t(otu_table(ps_Filter3))))

    ###### Pre-Filtering Step 4: Add best taxonomy to ASV
    ps_Filter_ASV_besttax<- add_besthit(ps_Filter3, sep=":")

    ###### Excluding samples with low sequencing depth
    #Check sample depths
    print(sample_sums(ps_Filter))
    print(sample_sums(ps_Filter_ASV_besttax))

    ###### Pre-Filtering Step 5: Remove low frequency Samples
    print(paste("Samples removed for low frequency below 1000 seqs in;", g, sep = ""))
    print(which(!rowSums(otu_table(ps_Filter_ASV_besttax)) > 1000))
    #WFSU21BBFL WFSU21MGFL WFSU22OSEB WFWI22MLEB WFWI22SSEB WFSU21BBEB WFSU21MGEB WFSU21MLEB WFSU21SSEB 
    #   156        157        161        167        169        305        306        307        308 
    head(rowSums(otu_table(ps_Filter_ASV_besttax)))
    library(phyloseq)
    ps_Filter_ASV_besttax_Samples <- prune_samples(rowSums(otu_table(ps_Filter_ASV_besttax)) > 1000,
                                                            ps_Filter_ASV_besttax) 
    
    ps_Filter_ASV_besttax_Samples <- ps_Filter_ASV_besttax_Samples %>% prune_taxa(taxa_sums(.) > 0, .) 
    #ps_Filter_ASV_besttax_Samples <- filter_taxa(ps_Filter_ASV_besttax_Samples, function (x) {sum(x > 0) > 1},
    #                                 prune=TRUE) 
    saveRDS(ps_Filter_ASV_besttax_Samples, file.path(path_Output_16S,paste(g, paste(paste("Filter_ASV_besttax_16S", Date, sep="_"), ".rds", sep=""), sep="_")))
    }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
  }
## [1] "We loose reads by Chloroplast & Mitochondria removal"
##            rowSums(otu_table(ps_Filter1))
## WFSU21MLFL                      -31.10626
## WFSU21SSFL                      -34.46571
## WFSU21MGEB                      -33.55368
## WFSU21BBEB                      -31.55194

## WFSU21MLFL WFSU21SSFL WFSU21MGEB WFSU21BBEB 
##      28393       7889      15128      13641 
## WFSU21MLFL WFSU21SSFL WFSU21MGEB WFSU21BBEB 
##      18087       4766       9359       8686 
## [1] "Samples removed for low frequency below 1000 seqs in;ps_WF"
## named integer(0)
## [1] "We loose reads by Chloroplast & Mitochondria removal"
##             rowSums(otu_table(ps_Filter1))
## SLSU21BBEB7                     -5.9112320
## SLSU21MGEB7                    -10.7851646
## SLSU21MLEB9                     -4.0957340
## SLSU21SSEB9                     -4.4024915
## SLSU21BBEB1                     -2.4187624
## SLSU21BBEB2                     -4.5541437
## SLSU21BBEB4                     -4.8074503
## SLSU21BBEB6                     -8.7584216
## SLSU21BBEB9                     -2.1457369
## SLSU21MGEB1                     -0.2410532
## SLSU21MGEB2                     -9.4619552
## SLSU21MGEB3                     -3.7015329
## SLSU21MGEB4                     -5.7483370
## SLSU21MGEB5                     -6.0697068
## SLSU21MLEB1                     -7.0909526
## SLSU21MLEB2                     -2.8816536
## SLSU21MLEB5                     -2.5881178
## SLSU21MLEB6                     -1.3078227
## SLSU21MLEB7                     -6.8814787
## SLSU21SSEB2                     -3.1020084
## SLSU21SSEB3                     -5.9675761
## SLSU21SSEB5                     -1.9594243
## SLSU21SSEB6                     -1.6093873
## SLSU21SSEB7                     -3.0832825

## SLSU21BBEB7 SLSU21MGEB7 SLSU21MLEB9 SLSU21SSEB9 SLSU21BBEB1 SLSU21BBEB2 
##       15073       34566       29958       26008       17695       26679 
## SLSU21BBEB4 SLSU21BBEB6 SLSU21BBEB9 SLSU21MGEB1 SLSU21MGEB2 SLSU21MGEB3 
##        7946       10390       73914       26965       16244       22180 
## SLSU21MGEB4 SLSU21MGEB5 SLSU21MLEB1 SLSU21MLEB2 SLSU21MLEB5 SLSU21MLEB6 
##       18040       12653       12523       39283       32031       33032 
## SLSU21MLEB7 SLSU21SSEB2 SLSU21SSEB3 SLSU21SSEB5 SLSU21SSEB6 SLSU21SSEB7 
##       16014       15087       33062       11534       21561       14757 
## SLSU21BBEB7 SLSU21MGEB7 SLSU21MLEB9 SLSU21SSEB9 SLSU21BBEB1 SLSU21BBEB2 
##       13205       27182       26798       22744       16628       23860 
## SLSU21BBEB4 SLSU21BBEB6 SLSU21BBEB9 SLSU21MGEB1 SLSU21MGEB2 SLSU21MGEB3 
##        7093        8510       70708       26718       12815       20421 
## SLSU21MGEB4 SLSU21MGEB5 SLSU21MLEB1 SLSU21MLEB2 SLSU21MLEB5 SLSU21MLEB6 
##       15644       10820       10278       36259       29257       31789 
## SLSU21MLEB7 SLSU21SSEB2 SLSU21SSEB3 SLSU21SSEB5 SLSU21SSEB6 SLSU21SSEB7 
##       13051       13839       28182       10695       20514       13621 
## [1] "Samples removed for low frequency below 1000 seqs in;ps_SL_21"
## named integer(0)
## [1] "We loose reads by Chloroplast & Mitochondria removal"
##             rowSums(otu_table(ps_Filter1))
## SLSU21BBEB7                     -5.9112320
## SLSU21MGEB7                    -10.7851646
## SLSU21MLEB9                     -4.0957340
## SLSU21SSEB9                     -4.4024915
## WFSU21MLFL                     -31.1062586
## WFSU21SSFL                     -34.4657118
## WFSU21MGEB                     -33.5536753
## WFSU21BBEB                     -31.5519390
## SLSU21BBEB1                     -2.4187624
## SLSU21BBEB2                     -4.5541437
## SLSU21BBEB4                     -4.8074503
## SLSU21BBEB6                     -8.7584216
## SLSU21BBEB9                     -2.1457369
## SLSU21MGEB1                     -0.2410532
## SLSU21MGEB2                     -9.4619552
## SLSU21MGEB3                     -3.7015329
## SLSU21MGEB4                     -5.7483370
## SLSU21MGEB5                     -6.0697068
## SLSU21MLEB1                     -7.0909526
## SLSU21MLEB2                     -2.8816536
## SLSU21MLEB5                     -2.5881178
## SLSU21MLEB6                     -1.3078227
## SLSU21MLEB7                     -6.8814787
## SLSU21SSEB2                     -3.1020084
## SLSU21SSEB3                     -5.9675761
## SLSU21SSEB5                     -1.9594243
## SLSU21SSEB6                     -1.6093873
## SLSU21SSEB7                     -3.0832825

## SLSU21BBEB7 SLSU21MGEB7 SLSU21MLEB9 SLSU21SSEB9  WFSU21MLFL  WFSU21SSFL 
##       15073       34566       29958       26008       28393        7889 
##  WFSU21MGEB  WFSU21BBEB SLSU21BBEB1 SLSU21BBEB2 SLSU21BBEB4 SLSU21BBEB6 
##       15128       13641       17695       26679        7946       10390 
## SLSU21BBEB9 SLSU21MGEB1 SLSU21MGEB2 SLSU21MGEB3 SLSU21MGEB4 SLSU21MGEB5 
##       73914       26965       16244       22180       18040       12653 
## SLSU21MLEB1 SLSU21MLEB2 SLSU21MLEB5 SLSU21MLEB6 SLSU21MLEB7 SLSU21SSEB2 
##       12523       39283       32031       33032       16014       15087 
## SLSU21SSEB3 SLSU21SSEB5 SLSU21SSEB6 SLSU21SSEB7 
##       33062       11534       21561       14757 
## SLSU21BBEB7 SLSU21MGEB7 SLSU21MLEB9 SLSU21SSEB9  WFSU21MLFL  WFSU21SSFL 
##       13253       27402       27002       22804       14242        3800 
##  WFSU21MGEB  WFSU21BBEB SLSU21BBEB1 SLSU21BBEB2 SLSU21BBEB4 SLSU21BBEB6 
##        7607        7132       16681       23886        7140        8584 
## SLSU21BBEB9 SLSU21MGEB1 SLSU21MGEB2 SLSU21MGEB3 SLSU21MGEB4 SLSU21MGEB5 
##       70882       26696       12928       20411       15751       10865 
## SLSU21MLEB1 SLSU21MLEB2 SLSU21MLEB5 SLSU21MLEB6 SLSU21MLEB7 SLSU21SSEB2 
##       10429       36343       29354       31833       13214       13836 
## SLSU21SSEB3 SLSU21SSEB5 SLSU21SSEB6 SLSU21SSEB7 
##       28380       10734       20554       13616 
## [1] "Samples removed for low frequency below 1000 seqs in;ps_SLWF_21"
## named integer(0)
ps_WF      <- readRDS(file.path(path_Output_16S, paste("ps_WF", paste(paste("Filter_ASV_besttax_16S", Date, sep="_"), ".rds", sep=""), sep="_")))
ps_SL_21   <- readRDS(file.path(path_Output_16S, paste("ps_SL_21", paste(paste("Filter_ASV_besttax_16S", Date, sep="_"), ".rds", sep=""), sep="_")))
ps_SLWF_21 <- readRDS(file.path(path_Output_16S, paste("ps_SLWF_21", paste(paste("Filter_ASV_besttax_16S", Date, sep="_"), ".rds", sep=""), sep="_")))
                      
pslist<- list("ps_WF" = ps_WF, "ps_SL_21" = ps_SL_21, "ps_SLWF_21" = ps_SLWF_21)

for (i in names(pslist)[grepl("ps", names(pslist))]){
  g <- paste(i)
  a <- length(pslist)
  ps <- pslist[[i]]
  ps_clr <- microbiome::transform(ps, "clr")
  pslist[[a+1]] <- ps_clr
  names(pslist)[[a+1]] <- paste("clr", sub("ps", "\\1", g), sep="")}

for (i in names(pslist)[grepl("clr", names(pslist))]){
  g <- paste(i)
  a <- length(pslist)
  clr <- pslist[[i]]
  TSE<-mia::makeTreeSummarizedExperimentFromPhyloseq(clr)
  pslist[[a+1]] <- TSE
  names(pslist)[[a+1]] <- paste("TSEc.l.r", sub("clr", "\\1", g), sep="")}

###########
#Save Data#
###########
saveRDS(pslist, file.path(path_Output_16S, paste(paste(save_name, "ps_16S_merge_Filter_ASV_besttax", Date, sep="_"), ".rds", sep="")))

3.2.1 Read SSU-data

pslist <- readRDS(file.path(path_Output_16S, paste(paste(save_name,"ps_16S_merge_Filter_ASV_besttax", Date, sep="_"), ".rds", sep="")))
pslistraw <- readRDS(file.path(path_Output_16S, paste(paste(save_name,"ps_16S_merge_pslistraw", Date, sep="_"), ".rds", sep="")))

-

4 Alpha Diversity

4.1 Observed Diversity Plots

##########################
#Observed Diversity Plots#
##########################
for (i in names(pslist)[grepl("ps_SLWF_21", names(pslist))]){
  require(plyr); require(ggrepel); require(cowplot); require(phyloseq)
  #if (OperatingSystem == "Mac" ) {quartz() }
  tryCatch({
  g       <- paste(i); print(i) 
  #gg      <- sub('ps', '', g)
  A<- plot_richness(pslist[[i]], x = "SampleID", measures = c("Observed")) + # Shannon
  geom_bar(aes(fill = sample_data(pslist[[i]])$Replicates), stat = "identity", position = "stack") + #fill = Phylum
  scale_x_discrete(limits = SL_SSU_Samples) +
  #scale_x_discrete(breaks=factor(sample_data(pslist[[i]])$Replicates, levels=SLOrder)) +
  scale_fill_manual(values = col.Palette$SL) +
  theme(axis.text.x.bottom = element_text(size=rel(0.8),angle = 45, vjust = 1, hjust = 1), 
        legend.title = element_text( size = 6),
        legend.text = element_text(size = 6),
        legend.key.size = unit(0.4, 'cm'),)+
  labs(x = element_blank(), y = "Alpha Diversity Measure (non normalized)")
  title <- ggdraw() + draw_label_themeRK(paste(g), element = "plot.title",x = 0.05, hjust = 0,    vjust = 1)
  subtitle <- ggdraw() + draw_label_themeRK(paste("Paired End 16S",sep = " "), element = "plot.subtitle",
                                            x = 0.05, hjust = 0,   
  vjust = 1)
  prow <- cowplot::plot_grid(A, labels = c(""), ncol = 1)
  A<- plot_grid(title, subtitle, prow, ncol = 1, rel_heights = c(0.04, 0.05, 0.98))
  ggsave(A, filename = paste(paste(save_name, Type, paste(g, "Observed-AlphaDiversity", sep="_"), Date, sep="_"), ".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8,
  height = 8)
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})}
## [1] "ps_SLWF_21"
A

rowSums(otu_table(pslist$ps_SL_21))
## SLSU21BBEB7 SLSU21MGEB7 SLSU21MLEB9 SLSU21SSEB9 SLSU21BBEB1 SLSU21BBEB2 
##       13205       27182       26798       22744       16628       23860 
## SLSU21BBEB4 SLSU21BBEB6 SLSU21BBEB9 SLSU21MGEB1 SLSU21MGEB2 SLSU21MGEB3 
##        7093        8510       70708       26718       12815       20421 
## SLSU21MGEB4 SLSU21MGEB5 SLSU21MLEB1 SLSU21MLEB2 SLSU21MLEB5 SLSU21MLEB6 
##       15644       10820       10278       36259       29257       31789 
## SLSU21MLEB7 SLSU21SSEB2 SLSU21SSEB3 SLSU21SSEB5 SLSU21SSEB6 SLSU21SSEB7 
##       13051       13839       28182       10695       20514       13621
sum(otu_table(pslist$ps_SL_21))
## [1] 510631
sum(otu_table(pslist$ps_WF))
## [1] 40898

4.2 Species Accumulation

- Figure S1A
require(vegan)
spacc_raw_SL_21 <- specaccum(otu_table(pslistraw$ps_SL_21), method = "random", permutations = 100)
spacc_SL_21     <- specaccum(otu_table(pslist$ps_SL_21), method = "random", permutations = 100)

# plot(spacc, ci.type="poly", col="blue", lwd=2, ci.lty=0, ci.col="lightblue")
# boxplot(spacc, col="yellow", add=TRUE, pch="+")

#creating a dataframe for ggplot2
data_SL_Raw <- data.frame(Specimen=spacc_raw_SL_21$sites, Richness=spacc_raw_SL_21$richness, SD=spacc_raw_SL_21$sd)
A<- ggplot() +
  geom_point(data=data_SL_Raw, aes(x=Specimen, y=Richness)) +
  geom_line(data=data_SL_Raw, aes(x=Specimen, y=Richness)) +
  geom_ribbon(data=data_SL_Raw ,aes(x=Specimen, ymin=(Richness-2*SD),ymax=(Richness+2*SD)),alpha=0.2) +
    atheme +
    theme(
         panel.background = element_rect(fill='transparent'), #transparent panel bg
         plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
         #legend.background = element_rect(fill='transparent'), #transparent legend bg
         #legend.box.background = element_rect(fill='transparent') #transparent legend panel
         ) +
      theme(axis.title.x.bottom =  element_text(color="grey13"), 
        strip.text = element_text(color = "black", face= "bold")) +
      theme(
        legend.title=element_text(size=10), 
        legend.text=element_text(size=10)) +
    theme(
        panel.grid.major = element_line(colour = "grey50"), 
        panel.grid.minor = element_line(colour = "grey50"))
Species_Accumulation_raw <- cowplot::plot_grid(A, labels = c(""), ncol = 1)

data_SL <- data.frame(Specimen=spacc_SL_21$sites, Richness=spacc_SL_21$richness, SD=spacc_SL_21$sd)
B<- ggplot() +
  geom_point(data=data_SL, aes(x=Specimen, y=Richness)) +
  geom_line(data=data_SL, aes(x=Specimen, y=Richness)) +
  geom_ribbon(data=data_SL,aes(x=Specimen, ymin=(Richness-2*SD),ymax=(Richness+2*SD)),alpha=0.2) +
      atheme +
    theme(
         panel.background = element_rect(fill='transparent'), #transparent panel bg
         plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
         #legend.background = element_rect(fill='transparent'), #transparent legend bg
         #legend.box.background = element_rect(fill='transparent') #transparent legend panel
         ) +
      theme(axis.title.x.bottom =  element_text(color="grey13"), 
        strip.text = element_text(color = "black", face= "bold")) +
      theme(
        legend.title=element_text(size=10), 
        legend.text=element_text(size=10)) +
    theme(
        panel.grid.major = element_line(colour = "grey50"), 
        panel.grid.minor = element_line(colour = "grey50"))
Species_Accumulation_Filtered <- cowplot::plot_grid(B, labels = c(""), ncol = 1)

cowplot::plot_grid(Species_Accumulation_raw, Species_Accumulation_Filtered, labels = c("A", "B"), ncol = 1) -> part_1
ggsave(part_1, filename = paste(paste(save_name, Type, "SpeciesAccumulation", Date, sep="_"), ".png", sep=""), path = pathPlots , device='png', dpi=300, width = 6,height = 9)
part_1

4.3 Alpha Diversity Barplot

- Figure 4A
##########################
#Barplot  with color code#
##########################
TaxLevel <- "Genus"
for (i in names(pslist)[grepl("ps_SLWF_21", names(pslist))]){
  require(plyr); require(ggrepel); require(cowplot); require(phyloseq)
     
    g       <- paste(i); print(g)
    #gg      <- gsub(paste0(".*", "(SL|WF)", ".*"), "\\1", i)
    gg      <- "SL"
    FILENAME    <- paste(paste(save_name, "Alpha_BarPlot_Publication", TaxLevel, gg, sep="_"), ".png", sep="")
    
    ################################
    #Create Relative Abundance Data#
    ################################
    ps_alpha_barplot <- pslist[[i]] %>%
    tax_glom(taxrank =  TaxLevel)   %>%  
    transform_sample_counts(function(x) {x/sum(x)*100}) %>% # Transform to rel. abundance
    psmelt() %>%                                         # Melt to long format
    filter(Abundance > 1) %>%                       # Filter out low abundance taxa
    arrange(Genus)        %>%                                # Sort data frame alphabetically by phylum
    dplyr::arrange(desc(Abundance))
 
    SLOrder <-c("WF", "SLSU21MG","SLSU21BB", "SLSU21SS", "SLSU21ML")
    ps_alpha_barplot$Reps <- sub("SLSU21", "", ps_alpha_barplot$Replicate2)  
    RepOrder <-c("WF", "MG-713","BB-692", "SS-665", "ML-633")
    ps_alpha_barplot$Reps<-ifelse(ps_alpha_barplot$Reps == "MG", "MG-713",
                        ifelse(ps_alpha_barplot$Reps == "BB", "BB-692",
                               ifelse(ps_alpha_barplot$Reps == "SS", "SS-665",
                                      ifelse(ps_alpha_barplot$Reps == "ML", "ML-633", "WF"))))

    ps_alpha_barplot$ASV <- sub(".*:", "", ps_alpha_barplot$OTU)  
    ps_alpha_barplot$ASV <- sub('\\.', ' ', ps_alpha_barplot$ASV)
    
    ############################
    #Create TotalAbundance Data#
    ############################
    phylum_abundance <- ps_alpha_barplot %>%
      dplyr::group_by(.data[[TaxLevel]]) %>%
      dplyr::summarise(TotalAbundance = sum(Abundance))
      ordered_levels <- phylum_abundance %>%
      dplyr::arrange(desc(TotalAbundance)) %>%
      pull(.data[[TaxLevel]])
      ps_alpha_barplot$Taxa <- factor(ps_alpha_barplot[[TaxLevel]], levels = ordered_levels)
      ps_alpha_barplot$SampleID <- factor(ps_alpha_barplot$SampleID, levels = SL_SSU_Samples)
      
    ##########################
    #Subset for Interest ASVs#
    ##########################
    df <- ps_alpha_barplot #[ps_alpha_barplot$OTU %in% GroupOfInterest,]
    
    ######################
    #Define Phylum Colors#
    ######################
    phylum_colors <- c(
          "Actinobacteriota"  = "red", 
          "Cyanobacteria"     = "green", 
          "Bacteroidota"      = "darkgreen",
          "Proteobacteria1"    = "darkorange2", 
          "Proteobacteria2"    = "#663300",
          #"Patescibacteria"  = "pink", 
          "Acidobacteriota "  = "#FF00CC",
          "Planctomycetota"   = "purple", 
          "Verrucomicrobiota" = "blue", 
          "Chloroflexota"     = "cyan",
          "Deinococcota"      = "#FFFF00",
          "Gemmatimonadota"   = "#8F7C00",  
          "Other" = "grey20")
       generate_shades <- function(base_color, num_variations) {
        color_ramp <- colorRampPalette(c(base_color, "white"))
        # Generate a vector of colors
        colors <- color_ramp(num_variations+1)
        # Create a data frame with the colors
        color_palette <- colors[1:(length(colors) - 1)]
      return(color_palette)}

    ##################################################
    #Create Color Dataframe for Taxa from PhylaColors#
    ##################################################
    taxonomy_df <- df[c("Phylum", "Taxa")]
    #taxonomy_df$ASV <- sub(".*:", "", taxonomy_df$OTU)
    taxonomy_df <- taxonomy_df[!duplicated(taxonomy_df$Taxa),]
    #############################################
    #Order to ordered_levels from Total Abudance#
    order_index <- match(ordered_levels, taxonomy_df$Taxa)
    taxonomy_df <- taxonomy_df[order_index, ]
    print(head(taxonomy_df[c("Phylum", "Taxa")]))
    
    taxonomy_df$Phylum2 <- ifelse(taxonomy_df$Phylum == "Proteobacteria",
              paste0("Proteobacteria", ave(seq_along(taxonomy_df$Phylum), 
              taxonomy_df$Phylum, FUN = function(x) ifelse(seq_along(x) %% 2 == 0, 2, 1))),
                               taxonomy_df$Phylum)
    
    unique_phyla <- unique(taxonomy_df$Phylum2)
      # Assign base colors to unique phyla
      #phylum_colors <- setNames(base_colors[1:length(unique_phyla)], unique_phyla)
      # Print the mapping of phyla to colors
      phylum_cols<- as.data.frame(phylum_colors)
      phylum_cols$Phyla <- rownames(phylum_cols)

    colored_taxonomy <- data.frame(
      Taxa = taxonomy_df$Taxa,
      Phylum2 = taxonomy_df$Phylum2,
      Color = character(length(taxonomy_df$Taxa)),  # Initialize an empty character vector
      stringsAsFactors = FALSE)
  
  
    # Generate shades and assign colors to each taxon
    for (ii in seq_along(unique_phyla)) {
      phylum <- unique_phyla[ii]
      
      if (phylum %in% names(phylum_colors)) {
         base_color <- phylum_colors[[phylum]]
        } else {
        base_color <- phylum_colors[["Other"]]
        }
      taxon_indices <- taxonomy_df$Phylum2 == phylum
      num_variations <- sum(taxon_indices)
      shades <- generate_shades(base_color, pmin(num_variations, 5))
      
      #colored_taxonomy$Color[taxon_indices] <- shades 
      colored_taxonomy$Color[which(taxon_indices)[1:pmin(num_variations, 5)]]<- shades
    }
    
    colored_taxonomy$Color <- gsub("^$", NA, trimws(colored_taxonomy$Color))
    colored_taxonomy$Color <- replace_na(colored_taxonomy$Color, "grey50")
  
    ##################
    # #Some adjustments#
    colored_taxonomy[colored_taxonomy$Taxa == "Polynucleobacter",]$Color <- "#FF6600"
     colored_taxonomy[colored_taxonomy$Taxa == "Photobacterium",]$Color <- "darkorange3"
    # colored_taxonomy[colored_taxonomy$Taxa == "Vibrio",]$Color <- "#FF9966"
    # colored_taxonomy[colored_taxonomy$Taxa == "Citrobacter",]$Color <- "#FFCC00"
    # colored_taxonomy[colored_taxonomy$Taxa == "Enterobacter",]$Color <-"#993300"
     colored_taxonomy[colored_taxonomy$Taxa == "Acinetobacter",]$Color <-"#FF9933"
    #colored_taxonomy[colored_taxonomy$Taxa == "Psychrobacter",]$Color <-"#FF9933"
    # 
      Alpha_Diversity_df <- left_join(df, colored_taxonomy)
    color_mapping <- setNames(Alpha_Diversity_df$Color, Alpha_Diversity_df$Taxa)

 levels(Alpha_Diversity_df$Taxa)[!levels(Alpha_Diversity_df$Taxa) %in% ordered_levels[1:34]] <-
      "Other"

        p <- ggplot(Alpha_Diversity_df, 
        aes(x = SampleID, y = Abundance, fill = factor(Taxa))) + 
        geom_bar(stat = "identity") +
        facet_grid(. ~ factor(Reps, levels = RepOrder), drop = TRUE, scale = "free", space = "free_x") +
        atheme2 +
        ylab("Relative Abundance [%] \n") + xlab("") + 
        labs(fill="") +
        scale_fill_manual(values = color_mapping) +
      
        #ggrepel::geom_text_repel(x = df$SampleID, y = df$Abundance,  aes(label = Labels),
        #inherit.aes = FALSE, min.segment.length = 0,nudge_y = 20,size=3, max.overlaps = Inf) +
      #awhite + 
        theme(strip.text.y = element_text(angle = 0))  +
        #theme(axis.text.x=element_blank(),
        #axis.text.y=element_blank(),
        #axis.title.y.left = element_blank(),
        #axis.ticks.y =element_blank() 
        #) +
        theme(
         panel.background = element_rect(fill='transparent'), #transparent panel bg
         plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
         #legend.background = element_rect(fill='transparent'), #transparent legend bg
         #legend.box.background = element_rect(fill='transparent') #transparent legend panel
         ) +
      theme(axis.title.x.bottom =  element_text(color="grey13"), 
        strip.text = element_text(color = "black", face= "bold")) +
      theme(
        legend.title=element_text(size=9), 
        legend.text=element_text(size=9), 
        axis.text.x.bottom = element_text(size= 9, face = "bold", angle = 45, hjust = 1),
        strip.text.y.left = element_text(size = 9,face = "bold"), 
        axis.title.y.left = element_text(size=12,face = "bold"))
      p
      g <- ggplot_gtable(ggplot_build(p))
        stripr <- which(grepl('strip-t', g$layout$name))
        fills <- alpha(c(col.Palette[[gg]][length(col.Palette[[gg]])], col.Palette[[gg]][-length(col.Palette[[gg]])]), 0.8)
        k <- 1
        for (iii in stripr) {
        j <- which(grepl('rect', g$grobs[[iii]]$grobs[[1]]$childrenOrder))
        g$grobs[[iii]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
        k <- k+1}
        
    Alpha_Diversity_BarPlot <- cowplot::plot_grid(g, labels = c(""), ncol = 1)
    Alpha_Diversity_BarPlot <- plot_grid(Alpha_Diversity_BarPlot, ncol = 1, rel_heights = c(100))
    ggsave(Alpha_Diversity_BarPlot, filename = FILENAME, path = pathPlots, device='png', dpi=300, width = 12, height = 6)
}
## [1] "ps_SLWF_21"
##            Phylum             Taxa
## 3    Bacteroidota  Elizabethkingia
## 2  Proteobacteria Polynucleobacter
## 33 Proteobacteria     Enterobacter
## 1  Proteobacteria   Photobacterium
## 35 Proteobacteria      Citrobacter
## 40   Bacteroidota Chryseobacterium
Alpha_Diversity_BarPlot

4.4 Core-Microbiome

Some thoughts from: McMurtrie, J., Alathari, S., Chaput, D. L., Bass, D., Ghambi, C., Nagoli, J., … & Tyler, C. R. (2021). Relationships between pond water and tilapia skin microbiomes in aquaculture ponds in Malawi. bioRxiv.McMurtrie, J., Alathari, S., Chaput, D. L., Bass, D., Ghambi, C., Nagoli, J., … & Tyler, C. R. (2021). Relationships between pond water and tilapia skin microbiomes in aquaculture ponds in Malawi. bioRxiv.

Core microbiome analysis seeks to find taxa conserved between samples above a defined prevalence threshold. In this case we are interested in finding the core MB of the Fish Swab community which is present in fish across different Locations sites. Core calculations will be performed with the microbiome package. We will then visualise these core taxa as a heatmap using pheatmap. This will also reveal abundances across both Loc and fish samples, indicating if the fish core taxa are in high abundance in the Loc water and therefore potentially non-residents. Final visualisations for the finished figure requires some manual editing in Inkscape.

R Setup Libraries

Import phyloseq objects Weight up in choosing what taxa level to study. The full ASV set is large and requires a low core threshold as there is a reasonable degree of variation in ASVs e.g. likely different strains. However, in the core analysis we are most interested in the major types of organisms appearing to be common in either system. For instance we could have three individual ASVs of the same genus, each a key photosynthetic but only one is dominant per pond site, collectively though they are occupying the same niche. This is obviously a generalisation and not applicable to all scenarios. Most core studies I have seen group at genus level. I did explore the use of tip_gloming instead to use a phylogenetic informed grouping level, rather than arbitrary taxonomy, but this compromises resolution of taxonomic classifications that I am not willing to accept.

require(phyloseq)
ps_SL_21_rel <- microbiome::transform(pslist$ps_SL_21, "compositional")

#Initial core computing
#Setting a detection threshold of 0.0001% (within samples) and prevelance threshold of 80% (across samples).
SL_core_taxa_standard <- microbiome::core_members(ps_SL_21_rel, detection = 0.0001, prevalence = 90/100)
ps_SL_21_rel_core <- microbiome::core(ps_SL_21_rel, detection = 0.0001, prevalence = .9)
core_taxa <- microbiome::taxa(ps_SL_21_rel_core)
tax_mat <- tax_table(ps_SL_21_rel_core)
tax_df <- as.data.frame(tax_mat)
tax_df$ASV <- rownames(tax_df)
# select taxonomy of only those OTUs that are core memebers based on the thresholds that were used.
core_taxa_class <- dplyr::filter(tax_df, rownames(tax_df) %in% core_taxa)
knitr::kable(head(core_taxa_class))
Kingdom Phylum Class Order Family Genus Species ASV
ASV1:Elizabethkingia Bacteria Bacteroidota Bacteroidia Flavobacteriales Weeksellaceae Elizabethkingia NA ASV1:Elizabethkingia
ASV2:Citrobacter Bacteria Proteobacteria Gammaproteobacteria Enterobacterales Enterobacteriaceae Citrobacter NA ASV2:Citrobacter
ASV3:Enterobacteriaceae Bacteria Proteobacteria Gammaproteobacteria Enterobacterales Enterobacteriaceae NA NA ASV3:Enterobacteriaceae
ASV4:Enterobacteriaceae Bacteria Proteobacteria Gammaproteobacteria Enterobacterales Enterobacteriaceae NA NA ASV4:Enterobacteriaceae
ASV5:Enterobacter Bacteria Proteobacteria Gammaproteobacteria Enterobacterales Enterobacteriaceae Enterobacter NA ASV5:Enterobacter
ASV6:Enterobacteriaceae Bacteria Proteobacteria Gammaproteobacteria Enterobacterales Enterobacteriaceae NA NA ASV6:Enterobacteriaceae
ps_SL_21_rel_gen <- microbiome::aggregate_taxa(ps_SL_21_rel_core, "Genus")
# Check if any taxa with no genus classification. aggregate_taxa will merge all unclassified to Unknown
any(taxa_names(ps_SL_21_rel_gen) == "Unknown")
## [1] TRUE
# Remove Unknown
ps_SL_21_rel_gen <- subset_taxa(ps_SL_21_rel_gen, Genus!="Unknown")
library(RColorBrewer)
prevalences <- seq(.05, 1, .05)
detections <- round(10^seq(log10(1e-5), log10(.2), length = 10), 3)

p1 <- microbiome::plot_core(ps_SL_21_rel_gen, 
                plot.type = "heatmap", 
                colours = rev(brewer.pal(5, "RdBu")),
                prevalences = prevalences, 
                detections = detections, min.prevalence = .8) +
    xlab("Detection Threshold (Relative Abundance (%))")
p1 <- p1 + theme_bw() + ylab("ASVs")
p1

pslist$Core <- core_taxa_class
4.4.1 Plot Core
- Figure S1B
######################################
#Barplot of Core Taxa with color code#
######################################
GroupOfInterest <- SL_core_taxa_standard
TaxLevel <- "Genus"
for (i in names(pslist)[grepl("ps_SLWF_21", names(pslist))]){
  require(plyr); require(ggrepel); require(cowplot); require(phyloseq)
     
    g       <- paste(i); print(g)
    #gg      <- gsub(paste0(".*", "(SL|WF)", ".*"), "\\1", i)
    gg      <- "SL"
    FILENAME    <- paste(paste(save_name, "Alpha_CoreTaxa_Publication", TaxLevel, gg, sep="_"), ".png", sep="")
    
    ################################
    #Create Relative Abundance Data#
    ################################
    ps_alpha_barplot <- pslist[[i]] %>%
    tax_glom(taxrank =  TaxLevel)   %>%  
    transform_sample_counts(function(x) {x/sum(x)*100}) %>% # Transform to rel. abundance
    psmelt() %>%                                         # Melt to long format
    #filter(Abundance > 1) %>%                       # Filter out low abundance taxa
    arrange(Genus)        %>%                                # Sort data frame alphabetically by phylum
    dplyr::arrange(desc(Abundance))
 
    SLOrder <-c("WF", "SLSU21MG","SLSU21BB", "SLSU21SS", "SLSU21ML")
    ps_alpha_barplot$Reps <- sub("SLSU21", "", ps_alpha_barplot$Replicate2)  
    RepOrder <-c("WF", "MG-713","BB-692", "SS-665", "ML-633")
    ps_alpha_barplot$Reps<-ifelse(ps_alpha_barplot$Reps == "MG", "MG-713",
                        ifelse(ps_alpha_barplot$Reps == "BB", "BB-692",
                               ifelse(ps_alpha_barplot$Reps == "SS", "SS-665",
                                      ifelse(ps_alpha_barplot$Reps == "ML", "ML-633", "WF"))))

    ps_alpha_barplot$ASV <- sub(".*:", "", ps_alpha_barplot$OTU)  
    ps_alpha_barplot$ASV <- sub('\\.', ' ', ps_alpha_barplot$ASV)
    #print(ps_alpha_barplot[grep("Megaira", ps_alpha_barplot$ASV),]$ASV)
    
    ############################
    #Create TotalAbundance Data#
    ############################
    phylum_abundance <- ps_alpha_barplot %>%
      dplyr::group_by(.data[[TaxLevel]]) %>%
      dplyr::summarise(TotalAbundance = sum(Abundance))
    ordered_levels <- phylum_abundance %>%
      dplyr::arrange(desc(TotalAbundance)) %>%
      pull(.data[[TaxLevel]])
    ps_alpha_barplot$Taxa <- factor(ps_alpha_barplot[[TaxLevel]], levels = ordered_levels)
    
    ##########################
    #Subset for Interest ASVs#
    ##########################
    #df <- ps_alpha_barplot #[ps_alpha_barplot$OTU %in% GroupOfInterest,] #Normal Alpha
    df <- ps_alpha_barplot[ps_alpha_barplot$OTU %in% GroupOfInterest,] #Core Alpha 
    df$Taxa <- factor(df$Taxa, levels = ordered_levels[ordered_levels %in% df$Taxa])
    
    ######################
    #Define Phylum Colors#
    ######################
    phylum_colors <- c(
          "Actinobacteriota"  = "red", 
          "Cyanobacteria"     = "green", 
          "Bacteroidota"      = "darkgreen",
          "Proteobacteria1"    = "darkorange2", 
          "Proteobacteria2"    = "#663300",
          #"Patescibacteria"  = "pink", 
          "Acidobacteriota "  = "#FF00CC",
          "Planctomycetota"   = "purple", 
          "Verrucomicrobiota" = "blue", 
          "Chloroflexota"     = "cyan",
          "Deinococcota"      = "#FFFF00",
          "Gemmatimonadota"   = "#8F7C00",  
          "Other" = "grey20")
       generate_shades <- function(base_color, num_variations) {
        color_ramp <- colorRampPalette(c(base_color, "white"))
        # Generate a vector of colors
        colors <- color_ramp(num_variations+1)
        # Create a data frame with the colors
        color_palette <- colors[1:(length(colors) - 1)]
      return(color_palette)}

    ##################################################
    #Create Color Dataframe for Taxa from PhylaColors#
    ##################################################
    taxonomy_df <- df[c("Phylum", "Taxa")]
    #taxonomy_df$ASV <- sub(".*:", "", taxonomy_df$OTU)
    taxonomy_df <- taxonomy_df[!duplicated(taxonomy_df$Taxa),]
    #############################################
    #Order to ordered_levels from Total Abudance#
    order_index <- match(ordered_levels[ordered_levels %in% taxonomy_df$Taxa], taxonomy_df$Taxa)
    taxonomy_df <- taxonomy_df[order_index, ]
    print(head(taxonomy_df[c("Phylum", "Taxa")]))
    
    taxonomy_df$Phylum2 <- ifelse(taxonomy_df$Phylum == "Proteobacteria",
              paste0("Proteobacteria", ave(seq_along(taxonomy_df$Phylum), 
              taxonomy_df$Phylum, FUN = function(x) ifelse(seq_along(x) %% 2 == 0, 2, 1))),
                               taxonomy_df$Phylum)
    
    unique_phyla <- unique(taxonomy_df$Phylum2)
      # Assign base colors to unique phyla
      #phylum_colors <- setNames(base_colors[1:length(unique_phyla)], unique_phyla)
      # Print the mapping of phyla to colors
      phylum_cols<- as.data.frame(phylum_colors)
      phylum_cols$Phyla <- rownames(phylum_cols)

    colored_taxonomy <- data.frame(
      Taxa = taxonomy_df$Taxa,
      Phylum2 = taxonomy_df$Phylum2,
      Color = character(length(taxonomy_df$Taxa)),  # Initialize an empty character vector
      stringsAsFactors = FALSE)
  
  
    # Generate shades and assign colors to each taxon
    for (ii in seq_along(unique_phyla)) {
      phylum <- unique_phyla[ii]
      
      if (phylum %in% names(phylum_colors)) {
         base_color <- phylum_colors[[phylum]]
        } else {
        base_color <- phylum_colors[["Other"]]
        }
      taxon_indices <- taxonomy_df$Phylum2 == phylum
      num_variations <- sum(taxon_indices)
      shades <- generate_shades(base_color, pmin(num_variations, 5))
      
      #colored_taxonomy$Color[taxon_indices] <- shades 
      colored_taxonomy$Color[which(taxon_indices)[1:pmin(num_variations, 5)]]<- shades
    }
    
    colored_taxonomy$Color <- gsub("^$", NA, trimws(colored_taxonomy$Color))
    colored_taxonomy$Color <- replace_na(colored_taxonomy$Color, "grey50")
    ##################
    # #Some adjustments#
    
    # colored_taxonomy[colored_taxonomy$Taxa == "Polynucleobacter",]$Color <- "#FF6600"
    # colored_taxonomy[colored_taxonomy$Taxa == "Photobacterium",]$Color <- "darkorange3"
    # colored_taxonomy[colored_taxonomy$Taxa == "Vibrio",]$Color <- "#FF9966"
    # colored_taxonomy[colored_taxonomy$Taxa == "Citrobacter",]$Color <- "#FFCC00"
    # colored_taxonomy[colored_taxonomy$Taxa == "Enterobacter",]$Color <-"#993300"
    # colored_taxonomy[colored_taxonomy$Taxa == "Acinetobacter",]$Color <-"#FF9933"
    #colored_taxonomy[colored_taxonomy$Taxa == "Psychrobacter",]$Color <-"#FF9933"
    # 
      Alpha_Diversity_df <- left_join(df, colored_taxonomy)
    color_mapping <- setNames(Alpha_Diversity_df$Color, Alpha_Diversity_df$Taxa)
      Alpha_Diversity_df <<- Alpha_Diversity_df
 # levels(Alpha_Diversity_df$Taxa)[!levels(Alpha_Diversity_df$Taxa) %in% ordered_levels[1:34]] <-
 #      "Other"

        p <- ggplot(Alpha_Diversity_df, 
        aes(x = SampleID, y = Abundance, fill = factor(Taxa))) + 
        geom_bar(stat = "identity") +
        facet_grid(. ~ factor(Reps, levels = RepOrder), drop = TRUE, scale = "free", space = "free_x") +
        atheme2 +
        ylab("Relative Abundance [%] \n") + xlab("Sample") + 
        labs(fill="") +
        scale_fill_manual(values = color_mapping) +
        #ggrepel::geom_text_repel(x = df$SampleID, y = df$Abundance,  aes(label = Labels),
        #inherit.aes = FALSE, min.segment.length = 0,nudge_y = 20,size=3, max.overlaps = Inf) +
      #awhite + 
        theme(strip.text.y = element_text(angle = 0))  +
        #theme(axis.text.x=element_blank(),
        #axis.text.y=element_blank(),
        #axis.title.y.left = element_blank(),
        #axis.ticks.y =element_blank() 
        #) +
        theme(
         panel.background = element_rect(fill='transparent'), #transparent panel bg
         plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
         #legend.background = element_rect(fill='transparent'), #transparent legend bg
         #legend.box.background = element_rect(fill='transparent') #transparent legend panel
         ) +
      theme(axis.title.x.bottom =  element_text(color="grey13"), 
        strip.text = element_text(color = "black", face= "bold")) +
      theme(
        legend.title=element_text(size=9), 
        legend.text=element_text(size=9), 
        axis.text.x.bottom = element_text(size= 9, face = "bold", angle = 45, hjust = 1),
        strip.text.y.left = element_text(size = 9,face = "bold"), 
        axis.title.y.left = element_text(size=12,face = "bold"))
      p
      g <- ggplot_gtable(ggplot_build(p))
        stripr <- which(grepl('strip-t', g$layout$name))
        fills <- alpha(c(col.Palette[[gg]][length(col.Palette[[gg]])], col.Palette[[gg]][-length(col.Palette[[gg]])]), 0.8)
        k <- 1
        for (iii in stripr) {
        j <- which(grepl('rect', g$grobs[[iii]]$grobs[[1]]$childrenOrder))
        g$grobs[[iii]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
        k <- k+1}

    Core_Alpha_Diversity_BarPlot <- cowplot::plot_grid(g, labels = c(""), ncol = 1)
    Core_Alpha_Diversity_BarPlot <- plot_grid(Core_Alpha_Diversity_BarPlot, ncol = 1, rel_heights = c(100))
    Core_Alpha_Diversity_BarPlot
    ggsave(Core_Alpha_Diversity_BarPlot, filename = FILENAME, path = pathPlots, device='png', dpi=300, width = 9,
    height = 5)
}
## [1] "ps_SLWF_21"
##               Phylum                  Taxa
## 3       Bacteroidota       Elizabethkingia
## 33    Proteobacteria          Enterobacter
## 35    Proteobacteria           Citrobacter
## 40      Bacteroidota      Chryseobacterium
## 31  Actinobacteriota CL500-29 marine group
## 166   Proteobacteria           Pseudomonas
#Core_Alpha_Diversity_BarPlot
4.4.2 Rel.Change in CoreMicrobiome vs Non Core
##################################################################
#Analyse Normalized Relative Abundance Changes in the Microbiome#
##################################################################
    RelCoreAbundance <- Alpha_Diversity_df[c("Sample", "Abundance", "Reps", "Taxa", "OTU", "Phylum", "Color")]
    RelCoreAbundance <- RelCoreAbundance[RelCoreAbundance$OTU %in% GroupOfInterest, ]
    RelCoreAbundance_AvgAbun <-  RelCoreAbundance %>% dplyr::group_by(Reps, Taxa) %>%  
      dplyr::summarize(AvgAbun = mean(Abundance))

    RelCoreAbundance_AvgAbun %>% dplyr::group_by(Reps)
## # A tibble: 100 × 3
## # Groups:   Reps [5]
##    Reps   Taxa                  AvgAbun
##    <chr>  <fct>                   <dbl>
##  1 BB-692 Elizabethkingia        33.1  
##  2 BB-692 Enterobacter           10.2  
##  3 BB-692 Citrobacter             9.57 
##  4 BB-692 Chryseobacterium        1.24 
##  5 BB-692 CL500-29 marine group   0.253
##  6 BB-692 Pseudomonas             1.20 
##  7 BB-692 Microvirgula            1.99 
##  8 BB-692 Deinococcus             0.992
##  9 BB-692 Lelliottia              1.91 
## 10 BB-692 Asinibacterium          1.07 
## # ℹ 90 more rows
    RelCoreAbundance_normalized <- RelCoreAbundance_AvgAbun %>%
      dplyr::group_by(Reps) %>%
      dplyr::mutate(NormalizedAbun = AvgAbun / sum(AvgAbun)*100) %>%
      dplyr::ungroup()

  RelCoreAbundance_normalized <- left_join(RelCoreAbundance_normalized, 
      RelCoreAbundance[c("Taxa", "Phylum", "Color")][!duplicated(RelCoreAbundance$Taxa),])
  
  RelCoreAbundance_normalized %>%
  dplyr::group_by(Reps, Phylum) %>%
  dplyr::summarise(SumNormalizedAbun = sum(NormalizedAbun))
## # A tibble: 20 × 3
## # Groups:   Reps [5]
##    Reps   Phylum           SumNormalizedAbun
##    <chr>  <chr>                        <dbl>
##  1 BB-692 Actinobacteriota            2.31  
##  2 BB-692 Bacteroidota               54.8   
##  3 BB-692 Deinococcota                1.53  
##  4 BB-692 Proteobacteria             41.3   
##  5 MG-713 Actinobacteriota            2.82  
##  6 MG-713 Bacteroidota               50.7   
##  7 MG-713 Deinococcota                1.19  
##  8 MG-713 Proteobacteria             45.3   
##  9 ML-633 Actinobacteriota            2.29  
## 10 ML-633 Bacteroidota               60.8   
## 11 ML-633 Deinococcota                1.64  
## 12 ML-633 Proteobacteria             35.3   
## 13 SS-665 Actinobacteriota            3.14  
## 14 SS-665 Bacteroidota               59.0   
## 15 SS-665 Deinococcota                5.63  
## 16 SS-665 Proteobacteria             32.2   
## 17 WF     Actinobacteriota           97.4   
## 18 WF     Bacteroidota                0.961 
## 19 WF     Deinococcota                0.0166
## 20 WF     Proteobacteria              1.60
  RelCoreAbundance_normalized %>%
  dplyr::group_by(Reps) %>%
  dplyr::summarise(SumAvgAbun = sum(AvgAbun))
## # A tibble: 5 × 2
##   Reps   SumAvgAbun
##   <chr>       <dbl>
## 1 BB-692       64.9
## 2 MG-713       56.1
## 3 ML-633       27.9
## 4 SS-665       57.4
## 5 WF           15.0
  FILENAME    <- paste(paste(save_name, "Alpha_CoreTaxa_RelCore_Publication", TaxLevel, gg, sep="_"), ".png", sep="")
  RelCoreAbundance_normalized_barplot <- ggplot(RelCoreAbundance_normalized, 
        aes(x = Reps, y = NormalizedAbun, fill = factor(Taxa))) + 
        geom_bar(stat = "identity") +
        facet_grid(. ~ factor(Reps, levels = RepOrder), drop = TRUE, scale = "free", space = "free_x") +
        atheme2 +
        ylab("Relative Abundance [%] \n") + xlab("") + 
        labs(fill="") +
        scale_fill_manual(values = color_mapping) +
        #ggrepel::geom_text_repel(x = df$SampleID, y = df$Abundance,  aes(label = Labels),
        #inherit.aes = FALSE, min.segment.length = 0,nudge_y = 20,size=3, max.overlaps = Inf) +
      #awhite + 
        theme(strip.text.y = element_text(angle = 0))  +
        #theme(axis.text.x=element_blank(),
        #axis.text.y=element_blank(),
        #axis.title.y.left = element_blank(),
        #axis.ticks.y =element_blank() 
        #) +
        theme(
         panel.background = element_rect(fill='transparent'), #transparent panel bg
         plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
         #legend.background = element_rect(fill='transparent'), #transparent legend bg
         #legend.box.background = element_rect(fill='transparent') #transparent legend panel
         ) +
      theme(axis.title.x.bottom =  element_text(color="grey13"), 
        strip.text = element_text(color = "black", face= "bold")) +
      theme(
        legend.title=element_text(size=9), 
        legend.text=element_text(size=9), 
        axis.text.x.bottom = element_text(size= 9, face = "bold", angle = 45, hjust = 1),
        strip.text.y.left = element_text(size = 9,face = "bold"), 
        axis.title.y.left = element_text(size=12,face = "bold")) +
        theme(legend.position = "none") 
      #RelCoreAbundance_normalized_barplot
        
        g <- ggplot_gtable(ggplot_build(RelCoreAbundance_normalized_barplot ))
          stripr <- which(grepl('strip-t', g$layout$name))
          fills <- alpha(col.Palette[[gg]], 0.7)
          k <- 1
          for (iii in stripr) {
          j <- which(grepl('rect', g$grobs[[iii]]$grobs[[1]]$childrenOrder))
          g$grobs[[iii]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
          k <- k+1}
        
    RelCoreAbundance_normalized_barplot <- cowplot::plot_grid(g, labels = c(""), ncol = 1)
    RelCoreAbundance_normalized_barplot<- plot_grid(RelCoreAbundance_normalized_barplot, ncol = 1, rel_heights = c(100))
    ggsave(RelCoreAbundance_normalized_barplot, filename = FILENAME, path = pathPlots, device='png', dpi=300, width = 4,
    height = 4)

  ##############
  #Summary Plot#
  ##############
cowplot::plot_grid(Core_Alpha_Diversity_BarPlot, RelCoreAbundance_normalized_barplot, labels = c("A", "B"), ncol = 2, rel_widths = c(1,0.5)) -> Core_Microbiome_plot
    
ggsave(Core_Microbiome_plot, filename = paste(paste(save_name,"Core_Microbiome_plot", Date, sep="_"), ".png", sep=""), path = pathPlots , device='png', dpi=300, width = 10, height = 5)

Core_Microbiome_plot

4.5 Alpha Metrics & Stats

##############################################
#Get shannon for all samples and group means:#
data_shannon <-as.data.frame(vegan::diversity(otu_table(pslist$ps_SLWF_21), index = "shannon"))
names(data_shannon) <- "Shannon"
replicates <- c("SLSU21MG", "SLSU21BB", "SLSU21SS", "SLSU21ML", "WF")
mean_Shannon <- data.frame(Replicate = character(), Mean_Shannon = numeric(), stringsAsFactors = FALSE)
for (replicate in replicates) {
    subset_rows <- grep(paste0("^", replicate), rownames(data_shannon))
    mean_value <- mean(data_shannon[subset_rows, "Shannon"])
    mean_Shannon <- rbind(mean_Shannon, data.frame(Replicate = replicate, Mean_Shannon = mean_value))
}
mean_Shannon
##   Replicate Mean_Shannon
## 1  SLSU21MG     3.946707
## 2  SLSU21BB     3.611333
## 3  SLSU21SS     4.336690
## 4  SLSU21ML     4.007143
## 5        WF     5.067468
data_Richness <-as.data.frame(vegan::estimateR(otu_table(pslist$ps_SLWF_21)))
data_Richness <- as.data.frame(t(data_Richness[1,]))
names(data_Richness) <- "Obs.Richness"
replicates <- c("SLSU21MG", "SLSU21BB", "SLSU21SS", "SLSU21ML", "WF")
mean_Richness <- data.frame(Replicate = character(), Mean_Obs.Richness = numeric(), stringsAsFactors = FALSE)
for (replicate in replicates) {
    subset_rows <- grep(paste0("^", replicate), rownames(data_Richness))
    mean_value <- mean(data_Richness[subset_rows, "Obs.Richness"])
    mean_Richness <- rbind(mean_Richness, data.frame(Replicate = replicate, Mean_Obs.Richness = mean_value))
}
mean_Richness
##   Replicate Mean_Obs.Richness
## 1  SLSU21MG          364.3333
## 2  SLSU21BB          404.8333
## 3  SLSU21SS          454.3333
## 4  SLSU21ML          501.8333
## 5        WF          516.7500
#################################
#Creating Stats over all Samples#
#################################
library(phyloseq)
library(tidyverse)
merged_reps <- merge_samples(pslist$ps_SLWF_21, "Replicate2")
# Use psmelt to obtain a long-format data.frame
merged_reps_Order <- merged_reps %>% tax_glom(taxrank = "Phylum") %>% 
  transform_sample_counts(function(x) {x/sum(x)}) %>% 
  psmelt() %>% 
  dplyr::filter(Abundance > 0.01) %>%                          
  dplyr::arrange("Phylum")    

B<- as.data.frame(merged_reps_Order %>%
  dplyr::select(Sample, SampleID,  Phylum, Abundance) %>%
  dplyr::group_by(Phylum, Sample) %>%
  dplyr::summarise(TotalAbundance = (sum(Abundance))*100)) 

#Relative Abundance of Phyla: 
B %>%
  group_by(Sample) %>%
  pivot_wider(names_from = Phylum, values_from = TotalAbundance) %>% as.data.frame()
##     Sample Acidobacteriota Actinobacteriota Bacteroidota Cyanobacteria
## 1       WF        2.153687        24.800952     15.20393      4.212806
## 2 SLSU21BB              NA         1.647843     20.15724            NA
## 3 SLSU21MG              NA         2.708390     21.43653            NA
## 4 SLSU21ML              NA         1.931500     14.38299            NA
## 5 SLSU21SS              NA         6.002329     32.18860            NA
##   Deinococcota Desulfobacterota Firmicutes Nitrospirota Planctomycetota
## 1           NA               NA         NA     2.882767        1.217168
## 2           NA               NA  22.417501           NA              NA
## 3           NA          1.06091   6.183967           NA              NA
## 4           NA               NA         NA           NA              NA
## 5     2.876533               NA         NA           NA              NA
##   Proteobacteria Verrucomicrobiota
## 1       38.02813          8.209024
## 2       52.98235                NA
## 3       62.32015          3.075763
## 4       77.68180          3.113886
## 5       54.31298          1.940432
#RatioProteobacteria_Bacteroidota
Proteobacteria_Bacteroidota_ratio <- B %>%
  group_by(Sample) %>%
  filter(Phylum %in% c("Proteobacteria", "Bacteroidota")) %>%
  pivot_wider(names_from = Phylum, values_from = TotalAbundance) %>%
  mutate(Ratio = Bacteroidota / Proteobacteria *100)
Proteobacteria_Bacteroidota_ratio
## # A tibble: 5 × 4
## # Groups:   Sample [5]
##   Sample   Bacteroidota Proteobacteria Ratio
##   <chr>           <dbl>          <dbl> <dbl>
## 1 SLSU21BB         20.2           53.0  38.0
## 2 SLSU21MG         21.4           62.3  34.4
## 3 SLSU21ML         14.4           77.7  18.5
## 4 SLSU21SS         32.2           54.3  59.3
## 5 WF               15.2           38.0  40.0
########
#Genera#
merged_reps <- merge_samples(pslist$ps_SL_21, "Replicates")
# Use psmelt to obtain a long-format data.frame
merged_reps_Genus <- merged_reps %>% tax_glom(taxrank = "Genus") %>% 
  transform_sample_counts(function(x) {x/sum(x)}) %>% 
  psmelt() %>% 
  dplyr::filter(Abundance > 0.01) %>%                          
  dplyr::arrange_("Genus")      
C<- as.data.frame(merged_reps_Genus %>%
  dplyr::select(Replicates, SampleID,  Genus, Abundance) %>%
  dplyr::group_by(Genus) %>%
  dplyr::summarise(TotalAbundance = (sum(Abundance)/4)*100)) %>%
  dplyr::arrange(desc(TotalAbundance))
#Overall Abundance
C
##                          Genus TotalAbundance
## 1              Elizabethkingia     17.7687334
## 2               Photobacterium     12.8560753
## 3             Polynucleobacter      9.6529027
## 4  Clostridium sensu stricto 1      7.6078737
## 5                 Enterobacter      5.6999721
## 6                  Citrobacter      5.2363582
## 7                Acinetobacter      4.4986099
## 8             Chryseobacterium      4.2442596
## 9                Psychrobacter      2.6683610
## 10                 Verticiella      2.1025235
## 11                 Pseudomonas      1.0920346
## 12                Microvirgula      1.0241103
## 13                  Shewanella      0.8970330
## 14                 Deinococcus      0.8588718
## 15                  Lelliottia      0.8323651
## 16              Flavobacterium      0.6897000
## 17                 Halioglobus      0.6623356
## 18               Luteolibacter      0.5855554
## 19              Persicirhabdus      0.5634031
## 20                Arthrobacter      0.5611946
## 21          Candidatus Megaira      0.5507169
## 22                 Caedibacter      0.5446414
## 23                      Vibrio      0.5433777
## 24                  Paracoccus      0.5351682
## 25              Asinibacterium      0.4558443
## 26              Terrimicrobium      0.4370458
## 27                   Aeromonas      0.3645315
## 28            Ornithobacterium      0.3125881
## 29                Alkanindiges      0.3066237
## 30        Paeniglutamicibacter      0.2596797
## 31             Ornithinicoccus      0.2513176
## 32               Ilumatobacter      0.2501617
#Abundance per Sampling Group
D<- as.data.frame(merged_reps_Genus %>%
  dplyr::select(Sample,  Genus, Abundance) %>%
  dplyr::group_by(Genus, Sample) %>%
  dplyr::summarise(AbundancePerSamplingGroup = (sum(Abundance))*100)) 
D
##                          Genus   Sample AbundancePerSamplingGroup
## 1                Acinetobacter SLSU21ML                 15.270341
## 2                Acinetobacter SLSU21SS                  2.724098
## 3                    Aeromonas SLSU21ML                  1.458126
## 4                 Alkanindiges SLSU21SS                  1.226495
## 5                 Arthrobacter SLSU21SS                  2.244778
## 6               Asinibacterium SLSU21MG                  1.823377
## 7                  Caedibacter SLSU21ML                  2.178566
## 8           Candidatus Megaira SLSU21ML                  2.202868
## 9             Chryseobacterium SLSU21MG                  1.132198
## 10            Chryseobacterium SLSU21ML                  6.280133
## 11            Chryseobacterium SLSU21SS                  9.564708
## 12                 Citrobacter SLSU21BB                  5.370691
## 13                 Citrobacter SLSU21MG                  7.429372
## 14                 Citrobacter SLSU21ML                  2.121338
## 15                 Citrobacter SLSU21SS                  6.024031
## 16 Clostridium sensu stricto 1 SLSU21BB                 24.309002
## 17 Clostridium sensu stricto 1 SLSU21MG                  6.122493
## 18                 Deinococcus SLSU21SS                  3.435487
## 19             Elizabethkingia SLSU21BB                 20.489376
## 20             Elizabethkingia SLSU21MG                 21.004960
## 21             Elizabethkingia SLSU21ML                  8.347379
## 22             Elizabethkingia SLSU21SS                 21.233218
## 23                Enterobacter SLSU21BB                  5.710513
## 24                Enterobacter SLSU21MG                  8.306017
## 25                Enterobacter SLSU21ML                  2.358088
## 26                Enterobacter SLSU21SS                  6.425272
## 27              Flavobacterium SLSU21SS                  2.758800
## 28                 Halioglobus SLSU21MG                  2.649342
## 29               Ilumatobacter SLSU21MG                  1.000647
## 30                  Lelliottia SLSU21BB                  1.023670
## 31                  Lelliottia SLSU21MG                  1.281001
## 32                  Lelliottia SLSU21SS                  1.024790
## 33               Luteolibacter SLSU21ML                  1.052830
## 34               Luteolibacter SLSU21SS                  1.289392
## 35                Microvirgula SLSU21BB                  1.244049
## 36                Microvirgula SLSU21MG                  1.653008
## 37                Microvirgula SLSU21SS                  1.199384
## 38             Ornithinicoccus SLSU21SS                  1.005270
## 39            Ornithobacterium SLSU21SS                  1.250352
## 40        Paeniglutamicibacter SLSU21ML                  1.038719
## 41                  Paracoccus SLSU21SS                  2.140673
## 42              Persicirhabdus SLSU21MG                  2.253612
## 43              Photobacterium SLSU21BB                 25.443702
## 44              Photobacterium SLSU21MG                 24.952556
## 45              Photobacterium SLSU21SS                  1.028043
## 46            Polynucleobacter SLSU21ML                 30.185558
## 47            Polynucleobacter SLSU21SS                  8.426052
## 48                 Pseudomonas SLSU21MG                  1.152685
## 49                 Pseudomonas SLSU21ML                  2.101740
## 50                 Pseudomonas SLSU21SS                  1.113714
## 51               Psychrobacter SLSU21ML                  1.948088
## 52               Psychrobacter SLSU21SS                  8.725356
## 53                  Shewanella SLSU21MG                  1.281001
## 54                  Shewanella SLSU21ML                  2.307131
## 55              Terrimicrobium SLSU21ML                  1.748183
## 56                 Verticiella SLSU21ML                  8.410094
## 57                      Vibrio SLSU21BB                  2.173511

-

5 Beta Diversity

5.1 PCA

- Figure 4 D
##################
# SampleDist_PCAs# Publication 
##################
#pslist <- pslistraw
for (i in names(pslist)[grepl("TSEc.l.r_SLWF_21", names(pslist))]){
  require(plyr)
  require(ggrepel) 
  require(cowplot)
  require(DESeq2)
  require(matrixStats)
  TSE <- pslist[[i]]
  tryCatch({
  g       <- paste(i); print(i) 
  gg      <- sub('TSEc.l.r_', '', g)
  
  A <- pcaplotRK_publication(TSE,intgroup = c("Replicate2"), pcX = 1, pcY = 2, title="",ellipse = TRUE,     ellipse.prob = 0.5, group_order = col.Palette$SL) +
    scale_fill_manual(values=col.Palette$SL) +
    scale_color_manual(values=col.Palette$SL) + 
    atheme +
  
  theme(
         panel.background = element_rect(fill='transparent'), #transparent panel bg
         plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
         #legend.background = element_rect(fill='transparent'), #transparent legend bg
         #legend.box.background = element_rect(fill='transparent') #transparent legend panel
         ) +
      theme(axis.title.x.bottom =  element_text(color="grey13"), 
        strip.text = element_text(color = "black", face= "bold")) +
      theme(
        legend.title=element_text(size=8), 
        legend.text=element_text(size=8)) +
    theme(
        panel.grid.major = element_line(colour = "grey50"), 
        panel.grid.minor = element_line(colour = "grey50"))

  SampleDist_PCA <- cowplot::plot_grid(A, labels = c(""), ncol = 1)
  
  A <- plot_grid(SampleDist_PCA, ncol = 1)
   plot(A)
  ggsave(A, filename = paste(save_name, gg, "clrPCA.png"), path = pathPlots, device='png', dpi=300, width = 6,
  height = 5)
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})}
## [1] "TSEc.l.r_SLWF_21"
## [1] "Adapted from Marini F, Binder H (2019). “pcaExplorer: an R/Bioconductor package for interacting with RNA-seq principal components.” BMC Bioinformatics, 20(1), 331. doi:10.1186/s12859-019-2879-1, https://bioconductor.org/packages/pcaExplorer/."

5.2 PERMANOVA

https://www.nicholas-ollberding.com/post/introduction-to-the-statistical-analysis-of-microbiome-data-in-r/ While PCA is an exploratory data visualization tool, we can test whether the samples cluster beyond that expected by sampling variability using permutational multivariate analysis of variance (PERMANOVA). It does this by partitioning the sums of squares for the within- and between-cluster components using the concept of centroids. Many permutations of the data (i.e. random shuffling) are used to generate the null distribution. The test from ADONIS can be confounded by differences in dispersion (or spread)…so we want to check this as well.

#Generate distance matrix
ps_clr <- microbiome::transform(pslist$ps_SLWF_21, "clr")
#Generate distance matrix
clr_dist_matrix <- phyloseq::distance(ps_clr, method = "euclidean") 
#ADONIS test
vegan::adonis2(clr_dist_matrix ~ phyloseq::sample_data(pslist$ps_SLWF_21)$Kind)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## vegan::adonis2(formula = clr_dist_matrix ~ phyloseq::sample_data(pslist$ps_SLWF_21)$Kind)
##                                               Df SumOfSqs      R2      F Pr(>F)
## phyloseq::sample_data(pslist$ps_SLWF_21)$Kind  1    28212 0.22547 7.5686  0.001
## Residual                                      26    96915 0.77453              
## Total                                         27   125127 1.00000              
##                                                  
## phyloseq::sample_data(pslist$ps_SLWF_21)$Kind ***
## Residual                                         
## Total                                            
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Dispersion test and plot
dispr <- vegan::betadisper(clr_dist_matrix, phyloseq::sample_data(pslist$ps_SLWF_21)$Kind)
dispr
## 
##  Homogeneity of multivariate dispersions
## 
## Call: vegan::betadisper(d = clr_dist_matrix, group =
## phyloseq::sample_data(pslist$ps_SLWF_21)$Kind)
## 
## No. of Positive Eigenvalues: 27
## No. of Negative Eigenvalues: 0
## 
## Average distance to median:
##  Fish Water 
## 60.15 47.71 
## 
## Eigenvalues for PCoA axes:
## (Showing 8 of 27 eigenvalues)
## PCoA1 PCoA2 PCoA3 PCoA4 PCoA5 PCoA6 PCoA7 PCoA8 
## 30306 20241  8563  5796  5131  4614  4194  3936
plot(dispr, main = "Ordination Centroids and Dispersion Labeled: Aitchison Distance", sub = "")

boxplot(dispr, main = "", xlab = "")

vegan::permutest(dispr)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df  Sum Sq Mean Sq      F N.Perm Pr(>F)   
## Groups     1  530.99  530.99 12.166    999  0.002 **
## Residuals 26 1134.81   43.65                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(vegan::betadisper(clr_dist_matrix, phyloseq::sample_data(pslist$ps_SLWF_21)$Kind))
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq Mean Sq F value   Pr(>F)   
## Groups     1  530.99  530.99  12.166 0.001749 **
## Residuals 26 1134.81   43.65                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Cant trust Adonis

##################
#For Fish vs Fish#
##################
#Pairwise Test
set.seed(123)
library(vegan)
ps_clr <- microbiome::transform(pslist$ps_SL_21, "clr")
clr_dist_matrix <- phyloseq::distance(ps_clr, method ="euclidean")
vegan::adonis2(clr_dist_matrix ~ phyloseq::sample_data(ps_clr)$Loc)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## vegan::adonis2(formula = clr_dist_matrix ~ phyloseq::sample_data(ps_clr)$Loc)
##                                   Df SumOfSqs      R2      F Pr(>F)    
## phyloseq::sample_data(ps_clr)$Loc  3    29011 0.34768 3.5533  0.001 ***
## Residual                          20    54431 0.65232                  
## Total                             23    83442 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dispr <- vegan::betadisper(clr_dist_matrix, phyloseq::sample_data(pslist$ps_SL_21)$Loc)
dispr
## 
##  Homogeneity of multivariate dispersions
## 
## Call: vegan::betadisper(d = clr_dist_matrix, group =
## phyloseq::sample_data(pslist$ps_SL_21)$Loc)
## 
## No. of Positive Eigenvalues: 23
## No. of Negative Eigenvalues: 0
## 
## Average distance to median:
##       Brunsbuettel        Medem Grund Muehlenberger Loch Schwarztonnen Sand 
##              46.20              50.90              46.13              46.33 
## 
## Eigenvalues for PCoA axes:
## (Showing 8 of 23 eigenvalues)
## PCoA1 PCoA2 PCoA3 PCoA4 PCoA5 PCoA6 PCoA7 PCoA8 
## 18637  9136  5406  4413  4185  3906  3480  3070
plot(dispr, main = "Ordination Centroids and Dispersion Labeled: Aitchison Distance", sub = "")

boxplot(dispr, main = "", xlab = "")

vegan::permutest(dispr)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df Sum Sq Mean Sq      F N.Perm Pr(>F)
## Groups     3  98.55  32.850 1.1638    999  0.345
## Residuals 20 564.51  28.226
anova(vegan::betadisper(clr_dist_matrix, phyloseq::sample_data(pslist$ps_SL_21)$Loc))
## Analysis of Variance Table
## 
## Response: Distances
##           Df Sum Sq Mean Sq F value Pr(>F)
## Groups     3  98.55  32.850  1.1638 0.3482
## Residuals 20 564.51  28.226
#Trust Adonis
pairwise.adonis(clr_dist_matrix, phyloseq::sample_data(ps_clr)$Loc)
##                                      pairs Df SumsOfSqs  F.Model        R2
## 1              Brunsbuettel vs Medem Grund  1  4108.699 1.440065 0.1258791
## 2       Brunsbuettel vs Muehlenberger Loch  1 12716.634 4.913451 0.3294644
## 3       Brunsbuettel vs Schwarztonnen Sand  1  7298.863 2.820892 0.2200231
## 4        Medem Grund vs Muehlenberger Loch  1 15061.548 5.274278 0.3453046
## 5        Medem Grund vs Schwarztonnen Sand  1 10402.206 3.643551 0.2670530
## 6 Muehlenberger Loch vs Schwarztonnen Sand  1  8434.257 3.256524 0.2456545
##   p.value p.adjusted sig
## 1   0.002      0.012   .
## 2   0.005      0.030   .
## 3   0.002      0.012   .
## 4   0.005      0.030   .
## 5   0.007      0.042   .
## 6   0.005      0.030   .
pairwise.adonis2(clr_dist_matrix ~ Loc, data=data.frame(phyloseq::sample_data(pslist$ps_SL_21)))
## $parent_call
## [1] "clr_dist_matrix ~ Loc , strata = Null , permutations 999"
## 
## $`Brunsbuettel_vs_Medem Grund`
##          Df SumOfSqs      R2      F Pr(>F)   
## Loc       1     4109 0.12588 1.4401  0.004 **
## Residual 10    28531 0.87412                 
## Total    11    32640 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $`Brunsbuettel_vs_Muehlenberger Loch`
##          Df SumOfSqs      R2      F Pr(>F)    
## Loc       1    12717 0.32946 4.9135  0.001 ***
## Residual 10    25881 0.67054                  
## Total    11    38598 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $`Brunsbuettel_vs_Schwarztonnen Sand`
##          Df SumOfSqs      R2      F Pr(>F)   
## Loc       1     7299 0.22002 2.8209  0.002 **
## Residual 10    25874 0.77998                 
## Total    11    33173 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $`Medem Grund_vs_Muehlenberger Loch`
##          Df SumOfSqs     R2      F Pr(>F)   
## Loc       1    15062 0.3453 5.2743  0.002 **
## Residual 10    28557 0.6547                 
## Total    11    43618 1.0000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $`Medem Grund_vs_Schwarztonnen Sand`
##          Df SumOfSqs      R2      F Pr(>F)   
## Loc       1    10402 0.26705 3.6436  0.004 **
## Residual 10    28550 0.73295                 
## Total    11    38952 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $`Muehlenberger Loch_vs_Schwarztonnen Sand`
##          Df SumOfSqs      R2      F Pr(>F)    
## Loc       1     8434 0.24565 3.2565  0.001 ***
## Residual 10    25900 0.75435                  
## Total    11    34334 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## attr(,"class")
## [1] "pwadstrata" "list"

Different approach via vst transformed data https://astrobiomike.github.io/amplicon/dada2_workflow_ex

#Fish vs Water
deseq_counts <- DESeqDataSetFromMatrix(as.data.frame(t(otu_table(pslist$ps_SLWF_21))), colData = phyloseq::sample_data(pslist$ps_SLWF_21), design = ~Kind)
deseq_counts_vst <- varianceStabilizingTransformation(deseq_counts)
vst_trans_count_tab <- assay(deseq_counts_vst)
euc_dist <- dist(t(vst_trans_count_tab))
anova(vegan::betadisper(euc_dist, phyloseq::sample_data(pslist$ps_SLWF_21)$Kind)) 
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq Mean Sq F value   Pr(>F)   
## Groups     1  7394.7  7394.7    9.53 0.004759 **
## Residuals 26 20174.4   775.9                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Fish vs Fish
deseq_counts <- DESeqDataSetFromMatrix(as.data.frame(t(otu_table(pslist$ps_SL_21))), colData = phyloseq::sample_data(pslist$ps_SL_21), design = ~Loc)
deseq_counts_vst <- varianceStabilizingTransformation(deseq_counts)
vst_trans_count_tab <- assay(deseq_counts_vst)
euc_dist <- dist(t(vst_trans_count_tab))
anova(vegan::betadisper(euc_dist, phyloseq::sample_data(pslist$ps_SL_21)$Loc)) 
## Analysis of Variance Table
## 
## Response: Distances
##           Df Sum Sq Mean Sq F value Pr(>F)
## Groups     3 1056.8  352.27  1.9636  0.152
## Residuals 20 3588.0  179.40

Checking by all sample types, we get a significant result (0.002) from the betadisper test. This tells us that there is a difference between group dispersions, which means that we can’t trust the results of an adonis (permutational anova) test on this, because the assumption of homogenous within-group disperions is not met. This isn’t all that surprising considering how different the water and biofilm samples are from the rocks.

An important note: An NMDS is just a visualization technique, and is not a statistical assessment of sample separation or correlation. For this you should run a statistical test, like an ANOSIM test for categorical variables, or a Mantel test for continuous variables. Other ordination techniques like PCA, CA, CCA, RDA, etc, may be more useful to you if you want your axes to be meaningful, or if you want to talk about variation partitioning.

Anosim

#Generate distance matrix
ps_clr <- microbiome::transform(pslist$ps_SLWF_21, "clr")
clr_dist_matrix <- phyloseq::distance(ps_clr, method = "euclidean") 
vegan::anosim(clr_dist_matrix, phyloseq::sample_data(pslist$ps_SLWF_21)$Kind, distance = "euclidean")
## 
## Call:
## vegan::anosim(x = clr_dist_matrix, grouping = phyloseq::sample_data(pslist$ps_SLWF_21)$Kind,      distance = "euclidean") 
## Dissimilarity: euclidean 
## 
## ANOSIM statistic R: 0.9867 
##       Significance: 0.001 
## 
## Permutation: free
## Number of permutations: 999

-

6 Export ASV & Core Dataframe

#############################################
#Create Bacterial Counts and Reseq Dataframe#
#############################################  
SL_SSU_Samples
##  [1] "WFSU21MGEB"  "WFSU21BBEB"  "WFSU21SSFL"  "WFSU21MLFL"  "SLSU21MGEB1"
##  [6] "SLSU21MGEB2" "SLSU21MGEB3" "SLSU21MGEB4" "SLSU21MGEB5" "SLSU21MGEB7"
## [11] "SLSU21BBEB1" "SLSU21BBEB2" "SLSU21BBEB4" "SLSU21BBEB6" "SLSU21BBEB7"
## [16] "SLSU21BBEB9" "SLSU21SSEB2" "SLSU21SSEB3" "SLSU21SSEB5" "SLSU21SSEB6"
## [21] "SLSU21SSEB7" "SLSU21SSEB9" "SLSU21MLEB1" "SLSU21MLEB2" "SLSU21MLEB5"
## [26] "SLSU21MLEB6" "SLSU21MLEB7" "SLSU21MLEB9"
TAX <- as.data.frame(tax_table(pslist$ps_SLWF_21))
OTU <- as.data.frame(t(otu_table(pslist$ps_SLWF_21)))
OTU$ASV <- rownames(OTU)
OTU<- OTU[,match(c(SL_SSU_Samples, "ASV"), colnames(OTU))] #reorder Colnames
REFSEQ <- refseq(pslist$ps_SLWF_21)
REFSEQ <- stack(as.character(REFSEQ, use.names=TRUE))
colnames(REFSEQ)[colnames(REFSEQ) == "ind"] <- "ASV"
SSUData <- TAX %>% rownames_to_column("RowName") %>%
          left_join(OTU %>% rownames_to_column("RowName"), by = "RowName") %>%
          column_to_rownames("RowName")
SSUData  <- left_join(SSUData , REFSEQ)
rownames(SSUData) <- SSUData$ASV
#Count species per Sample
head(ddply(SSUData ,.(ASV),numcolwise(sum)))
##                         ASV WFSU21MGEB WFSU21BBEB WFSU21SSFL WFSU21MLFL
## 1      ASV1:Elizabethkingia          0          0          8         14
## 2        ASV10:Microvirgula          0          0          0          1
## 3 ASV1000:Methyloparacoccus          3          2          3         32
## 4            ASV1001:Afipia          0          0          0          0
## 5    ASV1004:Reyranellaceae          0          0          1         11
## 6         ASV1008:Aquicella          1          2          3          1
##   SLSU21MGEB1 SLSU21MGEB2 SLSU21MGEB3 SLSU21MGEB4 SLSU21MGEB5 SLSU21MGEB7
## 1         283        3032        4830        2895        2611        4852
## 2           4         195         465         184         174         369
## 3           0           0           0           0           0           0
## 4           0           0          18           8           2           0
## 5           0           0           0           0           0           0
## 6           1           0           0           0           8          18
##   SLSU21BBEB1 SLSU21BBEB2 SLSU21BBEB4 SLSU21BBEB6 SLSU21BBEB7 SLSU21BBEB9
## 1        4947        5776        1964        2071        2759        3806
## 2         264         541         125         168         155         226
## 3           0           0           0           0           0           0
## 4           7          14           1           8           5           3
## 5           0           0           4           0           1           0
## 6           2           6          10           0           8          38
##   SLSU21SSEB2 SLSU21SSEB3 SLSU21SSEB5 SLSU21SSEB6 SLSU21SSEB7 SLSU21SSEB9
## 1        2244        3614        1508        3991        4248        3054
## 2         175         230          94         137         267         149
## 3           0          14           0           2           0           0
## 4          14           0           6           0          13           1
## 5           0           5           0           0           0           0
## 6           0           3           4           0          16           4
##   SLSU21MLEB1 SLSU21MLEB2 SLSU21MLEB5 SLSU21MLEB6 SLSU21MLEB7 SLSU21MLEB9
## 1        1252        1983        2515        2180        1205        1030
## 2          88          43          95          76         101          30
## 3          52          79          51         194           0          59
## 4           3           0           0           1           0           1
## 5           4           2           0           0           0           3
## 6           0           0           1           0           0           0
  ps_SLWF_21_relAbund <- pslist$ps_SLWF_21 %>%
        transform_sample_counts(function(x) {x/sum(x)*100})
  SSU_counts_rel <- as.data.frame(t(otu_table(ps_SLWF_21_relAbund))) 
  names(SSU_counts_rel) <- paste(names(SSU_counts_rel), "rel", sep="")
  SSU_counts_rel <- SSU_counts_rel %>%  mutate("ASV" = rownames(.))  %>% as.data.frame()
  SSU_counts_rel <- SSU_counts_rel[,match(c(paste(SL_SSU_Samples, "rel", sep=""), "ASV"),
                                         colnames(SSU_counts_rel))] #reorder Colnames
  SSU_counts_rel <- format(round(SSU_counts_rel[paste(SL_SSU_Samples, "rel", sep="")], 2), 
                           nsmall = 2)

  SSU_counts_rel[] <- sapply(SSU_counts_rel, as.numeric)
  SSU_counts_rel$relASVMeanAbundance <-  rowMeans(SSU_counts_rel)
  SSU_counts_rel <- SSU_counts_rel %>%  mutate("ASV" = rownames(.))  %>% as.data.frame()
  
  SSU_Data <- left_join(SSUData, SSU_counts_rel)
  SSU_Data <- SSU_Data %>% relocate(paste(SL_SSU_Samples, "rel", sep=""), .after=Species)
  SSU_Data <- SSU_Data %>% relocate(SL_SSU_Samples, .after=ASV)
  
  write.csv2(SSU_Data, file.path(path_Output_16S,paste(paste(save_name,"SL_16S_merge_Filter_ASV_besttax", Date, sep="_"), ".csv", sep="")))
  
  write.csv2(SSU_Data[SSU_Data$ASV%in% SL_core_taxa_standard,], 
             file.path(path_Output_16S,paste(paste(save_name,"16S_merge_Filter_ASV_besttax_Core", Date, sep="_"), ".csv", sep="")))

#-

7 Differential Abundance Testing DAT

7.1 DESeq2

ddslist <- list()
for (i in names(pslist)[grepl("ps_SL_21|ps_SLWF_21", names(pslist))]){
  paste("Code Raphael Koll raphael.koll@uni-hamburg.de")
  require(phyloseq); require(DESeq2); require(tidyverse); require(plyr); require(dplyr)
  g <- paste(i); print(g)
  a <- length(ddslist)
  ps <- pslist[[i]]
  #sample_data(ps)[[variable]] <- as.factor(sample_data(ps)[[variable]])
  #dds <- phyloseq_to_deseq2(ps, as.formula(paste("~",variable)))
  #dds <- DESeq(dds)
  
  #  if (grepl("WF", i)) {
  #  VARIABLE <- "Kind"
  #} else {
  #  VARIABLE <- variable
  #}
  
  VARIABLE <- variable
  
    if (!taxa_are_rows(ps)) {
        ps <- t(ps)}
    countData = round(as(otu_table(ps), "matrix"), digits = 0)
    SAMDF<-SAMDF16S[rownames(SAMDF16S) %in% rownames(sample_data(ps)),]
    library(DESeq2)
    dds<- DESeqDataSetFromMatrix(countData =
        as.matrix(as.data.frame(countData)[rownames(SAMDF)]), colData = SAMDF,
        design =as.formula(paste("~",VARIABLE)))
    dds  <- DESeq(dds)
    vst   <- varianceStabilizingTransformation(dds)
  ddslist [[a+1]] <- ps
  names(ddslist )[[a+1]] <- paste("ps", sub("ps", "\\1", g), sep="")
  ddslist [[a+2]] <- dds
  names(ddslist )[[a+2]] <- paste("dds", sub("ps", "\\1", g), sep="")
  ddslist [[a+3]] <- vst
  names(ddslist )[[a+3]] <- paste("vst", sub("ps", "\\1", g), sep="")
}
## [1] "ps_SL_21"
## [1] "ps_SLWF_21"
for (i in names(ddslist)[grepl("dds", names(ddslist))]) {
  paste("Code Raphael Koll raphael.koll@uni-hamburg.de")
  require(DESeq2); require(tidyverse); require(plyr); require(dplyr)
  tryCatch({
  a       <- length(ddslist)
  g       <- paste(i)
  gg      <- sub('....', '', g); print(gg)
  name2   <- ddslist[names(ddslist)[grepl(paste(gg), names(ddslist))]]
  VST     <- assay(name2[[names(name2)[grepl("vst", names(name2))]]])
  SAMDF   <- data.frame(sample_data(name2[[names(name2)[grepl("ps", names(name2))]]]))
  
  ###############Selection of Comparisons################
  VariableOrder<-VariableOrder
  A <- as.data.frame(t(combn(SAMDF %>% 
  arrange(factor(Season, levels = VariableOrder)) %>% #Chane hard coded Variable name here! 
  pull(paste(variable)) %>% 
  unique(),2)))
  A$V3<-Reduce(function(...) paste(..., sep = "-"), A)
  #######################################################
  
  #  if (grepl("WF", i)) {
  #   VARIABLE <- "Kind"
  #   A <-data.frame(c("Fish"="Fish"), c("Water"="Water"), c("Fish-Water"= "Fish-Water"))
  # } else {
  #   VARIABLE <- variable
  # }
 
  VARIABLE <- variable
   
  mylist <- list() 
  for (i in 1:nrow(A)) {
  res <- results(ddslist[[g]], lfcThreshold = 0.0, alpha=0.05, contrast=c(VARIABLE,A[i,1],A[i,2]))
  mylist[[i]] <- res 
  names(mylist)[[i]] <- A[i,3]}
  
  mylist2 <-list()
  for (x in names(mylist)) {
  mylist2[[x]]<-dplyr::left_join(rownames_to_column(as.data.frame(mylist[[x]])),  
                          rownames_to_column(as.data.frame(VST[rownames(VST) %in%
                          rownames(as.data.frame(mylist[[x]])),])))

  mylist2 <- lapply(mylist2,na.omit)
  paste("Dim with NAs")
  paste(sapply(mylist2,dim))
  paste("Bacterial species with Basemean lower 1")
  paste(sapply(lapply(mylist2, function(y) {y[which(y$baseMean < 1),]}),dim))
  mylist2 <- lapply(mylist2, function(y) {y[which(y$padj < alpha),]})
  paste("Dim NA omit")
  paste(sapply(mylist2,dim))
  mylist2 <- lapply(mylist2, function(z){z[order(z$padj),]})
  mylist2 <- lapply(mylist2, function(x) {x$sign <-ifelse(sign(x$log2FoldChange) > 0, "enriched", "diminished");return(x)})
  }
  
  ddslist[[a+1]] <- mylist2
  names(ddslist)[[a+1]] <- paste("res", sub("dds", "\\1", g), sep="")},
  error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}
## [1] "SL_21"
## [1] "SLWF_21"
names(ddslist)
## [1] "ps_SL_21"    "dds_SL_21"   "vst_SL_21"   "ps_SLWF_21"  "dds_SLWF_21"
## [6] "vst_SLWF_21" "res_SL_21"   "res_SLWF_21"
paste("Differentially Abundant Taxa")
## [1] "Differentially Abundant Taxa"
sapply(ddslist$"res_SL_21" ,dim)[1,]
## SLSU21BB-SLSU21MG SLSU21BB-SLSU21ML SLSU21BB-SLSU21SS SLSU21MG-SLSU21ML 
##                18               195                70               229 
## SLSU21MG-SLSU21SS SLSU21ML-SLSU21SS 
##                98                68
sapply(ddslist$"res_SLWF_21" ,dim)[1,]
## SLSU21BB-SLSU21MG SLSU21BB-SLSU21ML SLSU21BB-SLSU21SS       SLSU21BB-WF 
##                24               320               206               181 
## SLSU21MG-SLSU21ML SLSU21MG-SLSU21SS       SLSU21MG-WF SLSU21ML-SLSU21SS 
##               263               143               216                51 
##       SLSU21ML-WF       SLSU21SS-WF 
##               294               258
###########
#Save Data#
###########
saveRDS(ddslist, file = paste0(file.path(path_Output_16S, paste(paste(save_name,"16S_DAT_Replicates_pairwise", Date, sep="_"), ".rds", sep=""))))

ddslist<-readRDS(file = paste0(file.path(path_Output_16S, paste(paste(save_name, "16S_DAT_Replicates_pairwise", Date, sep="_"), ".rds", sep=""))))

7.2 DAT visualization

7.2.1 PCA

##################
# SampleDist PCAs#
##################
for (i in names(ddslist)[grepl("vst", names(ddslist))]){
  paste("Code Raphael Koll raphael.koll@uni-hamburg.de using code adapted from
        pcaExplorer, please cite:
      Marini, F., & Binder, H. (2019). pcaExplorer: an R/Bioconductor package for interacting with RNA-seq
      principal components. BMC bioinformatics, 20, 1-8.")
  require(plyr); require(ggrepel); require(cowplot)
  if (OperatingSystem == "Mac" ) {quartz() }
  tryCatch({
  g       <- paste(i) 
  gg      <- sub('....', '', g)
  A<-pcaplotRK3(ddslist[[i]],intgroup = c("Replicates"), pcX = 1, pcY = 2, title="",ellipse = TRUE,     ellipse.prob = 0.5) +
  scale_fill_manual(values=col.Palette$SL) + #col.Palette.SeqCenter #col.Palette.Cruises
  scale_color_manual(values=col.Palette$SL)+ atheme
  prow <- cowplot::plot_grid(A, labels = c(""), ncol = 1)
  title <- ggdraw() + draw_label_themeRK(paste(Species,  gg, Type), element = "plot.title",x = 0.05, hjust = 0,    vjust = 1)
  subtitle <- ggdraw() + draw_label_themeRK(paste("vst-PCA [not dealing with compositionality]", "with",      length(rownames(ddslist[[i]])),"taxa [tax-glommed]",sep = " "), element = "plot.subtitle",x = 0.05, hjust = 0,   
  vjust = 1)
  A<- plot_grid(title, subtitle, prow, ncol = 1, rel_heights = c(0.04, 0.05, 0.98))
  plot(A)
  ggsave(A, filename = paste(paste(save_name, gg, Type, "clrPCA", sep="_"), ".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8,
  height = 8)
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}
## [1] "Adapted from Marini F, Binder H (2019). “pcaExplorer: an R/Bioconductor package for interacting with RNA-seq principal components.” BMC Bioinformatics, 20(1), 331. doi:10.1186/s12859-019-2879-1, https://bioconductor.org/packages/pcaExplorer/."

## [1] "Adapted from Marini F, Binder H (2019). “pcaExplorer: an R/Bioconductor package for interacting with RNA-seq principal components.” BMC Bioinformatics, 20(1), 331. doi:10.1186/s12859-019-2879-1, https://bioconductor.org/packages/pcaExplorer/."

###################
#Pairwise Barplots#
###################
for (i in names(ddslist)[grepl("res", names(ddslist))]){
  paste("Code Raphael Koll raphael.koll@uni-hamburg.de")
  require(plyr); require(ggrepel); require(cowplot);  require(scales)
  tryCatch({
  g       <- paste(i) 
  gg      <- sub('....', '', g)
  res     <- ddslist[[i]]
  FILENAME    <- paste(paste(Species, gg, Type, "LCF0"), ".png", sep="")
  TITLE       <- paste(Species, gg, Type, "Differential abundant taxa", sep=" ")
  for (x in names(res)){ 
        tryCatch({
  res_tax_sig <- res[[x]]
  xx       <- paste(x)
  #res <- res[res$baseMean >= 10,] #Select Species with Basemean higher 10
  res_tax_sig<-res_tax_sig[order(res_tax_sig$baseMean, decreasing=T), ] #order for BaseMean
  res_tax_sig_head <-head(res_tax_sig, 20)
  rownames(res_tax_sig_head) <-res_tax_sig_head$rowname
  p <- ggplot(data=res_tax_sig_head, aes(x=fct_reorder(rownames(res_tax_sig_head), baseMean), 
                                    y= log2FoldChange, fill = baseMean)) +
  geom_bar(stat="identity") + coord_flip() +
  scale_fill_gradient(low = "white", high = "red", breaks = c(10, 100, 500, 1000), limits=c(10,500), oob=squish) +
  geom_text(aes(label = round(baseMean, 0)), color="black", size=2.5)+
  theme(axis.text.y = element_text(size = 8), strip.text.x = element_text(size = 7),
        legend.title = element_text( size=6), legend.text=element_text(size=6), 
        axis.title = element_text(size = 8)) +  xlab("") + ylab("log2 FoldChange") 
  prow <- cowplot::plot_grid(p, labels = c(""), ncol = 1)
  title <- cowplot::ggdraw() + draw_label_themeRK(paste(paste(TITLE, ":", sep=""), x, sep = "       "), 
                element = "plot.title",x = 0.05, hjust     = 0, vjust = 1)
        subtitle <- cowplot::ggdraw() + draw_label_themeRK(paste("top 20 of", table(sign(res_tax_sig$log2FoldChange) > 0)[2],"enriched,", table(sign(res_tax_sig$log2FoldChange) > 0)[1], "diminished species,", "[tax-glom on species level,", dim(otu_table(ddslist[[paste("ps_", noquote(gg), sep="")]]))[1],"taxa]", sep = " "), element = "plot.subtitle",x = 0.05, hjust = 0, vjust = 1)
if (OperatingSystem == "Mac" ) { quartz() }
A<- plot_grid(title, subtitle, prow, ncol = 1, rel_heights = c(0.05, 0.05, 0.98))
  plot(A)
  ggsave(A, filename = paste(paste(save_name, Type, "DESeq2-pairwise", xx, sep="_"), ".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8,
  height = 8)},
        error=function(e){cat("ERROR :",conditionMessage(e), "\n")})}
  },error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}

7.2.2 Aggregating DESeq2-Pairwise Comparisons

###########################################################
#Determine the Cluster-Numbers from the unclustered-Heatmap
rowclusternum        <- 1
columnclusternum     <- 1
for (i in names(ddslist)[grepl("dds", names(ddslist))]) {
  paste("Code Raphael Koll raphael.koll@uni-hamburg.de code adapted from https://github.com/kevinblighe/E-MTAB-6141")
  tryCatch({  
  min_count   <- 1
  a           <- length(ddslist)
  g           <- paste(i)
  gg          <- sub('....', '', g)
  FILENAME    <- paste(paste(Species, gg, Type, "LCF0"), ".png", sep="")
  TITLE       <- draw_label_themeRKwhite(paste(Species, gg, Type, "DAT"), element = "plot.title", x =  0.05, hjust = 0, vjust = 1)
  name2       <- ddslist[names(ddslist)[grepl(paste(gg), names(ddslist))]]
  vst         <- assay(name2[[names(name2)[grepl("vst", names(name2))]]])
  ps          <-  name2[[names(name2)[grepl("ps", names(name2))]]]
  SAMDF       <- SAMDF16S[rownames(SAMDF16S) %in% rownames(sample_data(ps)),]
  res         <- name2[[names(name2)[grepl("res", names(name2))]]] #res
  res         <- lapply(res, function(z){z[z$baseMean >= min_count,]})
  
  resgenelist   <- list()
    for (ii in names(res)) {
      genes <- res[[ii]]$rowname
      resgenelist[[ii]] <- genes}
  resgenelist <- unique(as.vector(unlist(resgenelist)))

  
  A           <- BacteriaHeatPlotRK6(vst = vst, Species = Species, min_count = min_count, 
                          genes_of_interest = resgenelist, Samples = colnames(vst), SAMDF = SAMDF,  TITLE = TITLE)
  A 
   #Save heatmap to Environment
  #b           <- length(.GlobalEnv[[name]])
  #.GlobalEnv[[name]][[b+1]]        <- hmap
  #names(.GlobalEnv[[name]])[[b+1]] <- paste("hmap", sub("dds6", "\\1", g), sep="-")
  
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})}
## ERROR : object 'OutlineColor' not found 
## ERROR : object 'OutlineColor' not found
############################################################
#Cluster Heatmap with Extraction of Genes/Taxa from Cluster#
rowclusternum        <- 4
columnclusternum     <- 4
Cluster       <- list()
for (i in names(ddslist)[grepl("dds_SLWF_21", names(ddslist))]) {
  paste("Code Raphael Koll raphael.koll@uni-hamburg.de code adapted from https://github.com/kevinblighe/E-MTAB-6141")
  tryCatch({  
  min_mean_count   <- 10
  a           <- length(Cluster)
  g           <- paste(i)
  gg          <- sub('....', '', g)
  FILENAME    <- paste(paste(save_name, gg, Type, "LCF0", "MinMeanGroup", min_mean_count, sep="_"), ".png", sep="")
  TITLE       <- draw_label_themeRK(paste(Species, gg, Type, "DAT"), element = "plot.title", x =  0.05, hjust = 0, vjust = 1)
  name2       <- ddslist[names(ddslist)[grepl(paste(gg), names(ddslist))]]
  vst         <- assay(name2[[names(name2)[grepl("vst", names(name2))]]])
  clr         <- as.data.frame(assay(pslist[[paste("TSEc.l.r", sub("dds", "", i), sep="")]]))
  ps          <- name2[[names(name2)[grepl("ps", names(name2))]]]
  SAMDF       <- SAMDF16S[rownames(SAMDF16S) %in% rownames(sample_data(ps)),]
  res         <- name2[[names(name2)[grepl("res", names(name2))]]] #res
  #res         <- lapply(res, function(z){z[z$baseMean >= min_count,]})
  ##############################################
  #Filter for min_mean_count per sampling group#
  ASVs_to_keep <- list()
  for (Replicate2 in unique(SAMDF$Replicate2)) {
    ASVs_to_keep_length <- length(ASVs_to_keep)
    df <- as.data.frame(otu_table(ps))
    df_order <- df[grepl(paste(Replicate2), names(df))]
    df_order[] <- sapply(df_order, as.numeric)
    df_order <- df_order[rowMeans(df_order) > min_mean_count,]
    df_order <- rownames(df_order)
  ASVs_to_keep[[ASVs_to_keep_length+1]] <- df_order
  names(ASVs_to_keep)[[ASVs_to_keep_length+1]] <- paste(Replicate2)}
  ASVs_to_keep <- unique(as.vector(unlist(ASVs_to_keep)))
  ########################
  #Extract Deseq2 Results#
  resgenelist   <- list()
    for (ii in names(res)) {
      genes <- res[[ii]]$rowname
      resgenelist[[ii]] <- genes}
  resgenelist <- unique(as.vector(unlist(resgenelist)))
  #####################################################
  #Plot Hamp of z-Score CLR filtered for MinMean_Count#
  resgenelist <- resgenelist[resgenelist %in% ASVs_to_keep]
    # hmap <-  BacteriaHeatPlotRK7(OmicsData = CLR, Species = Species, 
    #                     genes_of_interest = resgenelist, Samples = rownames(SAMDF), 
    #                     SAMDF = SAMDF,  TITLE = TITLE, filename= FILENAME)
  BacteriaHeatPlotRK7_withCore(OmicsData = clr, 
                               Species = Species, 
                               genes_of_interest = resgenelist, 
                               Samples = rownames(SAMDF), 
                               SAMDF = SAMDF,  
                               TITLE = TITLE, 
                               filename= FILENAME, 
                               OutlineColor="grey20")
  #################################
  #Extraxt Genes/Taxa from Heatmap#
    hmap        <- draw(hmap)
    clrow       <- row_order(hmap)
    genecluster <- lapply(names(clrow), function(x){
         out  <- data.frame(GeneID = rownames(heat[clrow[[x]],]),
                    clrow = paste0(x),stringsAsFactors = FALSE)
            return(out)}) %>%   
         do.call(rbind, .)
  ######################
  #Save Cluster Results#
  Cluster1    <- genecluster[genecluster$clrow %in% c("Cluster 1"),]
  Cluster2    <- genecluster[genecluster$clrow %in% c("Cluster 2"),]
  Cluster3    <- genecluster[genecluster$clrow %in% c("Cluster 3"),]
  Cluster4    <- genecluster[genecluster$clrow %in% c("Cluster 4"),]
  Cluster5    <- genecluster[genecluster$clrow %in% c("Cluster 5"),]
  Cluster6    <- genecluster[genecluster$clrow %in% c("Cluster 6"),]
  Cluster1    <- heat[rownames(heat) %in% Cluster1$GeneID,]
  Cluster2    <- heat[rownames(heat) %in% Cluster2$GeneID,]
  Cluster3    <- heat[rownames(heat) %in% Cluster3$GeneID,]
  Cluster4    <- heat[rownames(heat) %in% Cluster4$GeneID,]
  Cluster5    <- heat[rownames(heat) %in% Cluster5$GeneID,]
  Cluster6    <- heat[rownames(heat) %in% Cluster6$GeneID,]
  Cluster[[a+1]] <- as.data.frame(Cluster1)
  Cluster[[a+2]] <- as.data.frame(Cluster2)
  Cluster[[a+3]] <- as.data.frame(Cluster3)
  Cluster[[a+4]] <- as.data.frame(Cluster4)
  Cluster[[a+5]] <- as.data.frame(Cluster5)
  Cluster[[a+6]] <- as.data.frame(Cluster6)
  names(Cluster)[[a+1]] <- paste("Cluster1-", sub("dds", "\\1", g), sep="")
  names(Cluster)[[a+2]] <- paste("Cluster2-", sub("dds", "\\1", g), sep="")
  names(Cluster)[[a+3]] <- paste("Cluster3-", sub("dds", "\\1", g), sep="")
  names(Cluster)[[a+4]] <- paste("Cluster4-", sub("dds", "\\1", g), sep="")
  names(Cluster)[[a+5]] <- paste("Cluster5-", sub("dds", "\\1", g), sep="")
  names(Cluster)[[a+6]] <- paste("Cluster6-", sub("dds", "\\1", g), sep="")
  
  Cluster[[rowclusternum+1]] <- hmap
  names(Cluster)[[rowclusternum+1]] <- paste("hmap", sub("dds", "\\1", g), sep="")
  
  Cluster[[rowclusternum+2]] <- HeatPlot_prow
  names(Cluster)[[rowclusternum+2]] <- paste("HeatPlot_prow", sub("dds", "\\1", g), sep="")
  },
  error=function(e){cat("ERROR :",conditionMessage(e), "\n")})}

print(sapply(Cluster,dim))
## $`Cluster1-_SLWF_21`
## [1] 113  28
## 
## $`Cluster2-_SLWF_21`
## [1] 93 28
## 
## $`Cluster3-_SLWF_21`
## [1] 91 28
## 
## $`Cluster4-_SLWF_21`
## [1] 71 28
## 
## $hmap_SLWF_21
## NULL
## 
## $HeatPlot_prow_SLWF_21
## NULL
###########
#Save Data#
###########
saveRDS(Cluster, file = paste0(file.path(path_Output_16S,paste(paste(save_name, "16S_DAT_Replicates_pw4_Cluster", Date, sep="_"), ".rds", sep=""))))
        
Cluster <-readRDS(file = paste0(file.path(path_Output_16S,paste(paste(save_name, "16S_DAT_Replicates_pw4_Cluster", Date, sep="_"), ".rds", sep=""))))


#Export Data for Cluster
###########################################
#SSU_Data created in 4.2.4 Create SSU_Data#
###########################################
 Deseq2Cluster_Taxonomy_list <- list()
  for (Comparison in names(Cluster[grepl("Cluster", names(Cluster))])) {
      Deseq2Cluster_Taxonomy_list_length <- length( Deseq2Cluster_Taxonomy_list)
      df <- Cluster[[paste(Comparison)]]
      df <- SSU_Data[SSU_Data$ASV %in% rownames(df),]
      
      df <- df %>% relocate(ASV, .after=SLSU21MLEB9rel)
      rownames(df) <- df$ASV
    
      ASVs_to_keep <- list()
        for (Replicate2 in unique(SAMDF16S$Replicate2)) {
          ASVs_to_keep_length <- length(ASVs_to_keep)
          df_ASV <- as.data.frame(otu_table(ps))
          df_ASV_order   <- df_ASV [grepl(paste(Replicate2), names(df_ASV))]
          df_ASV_order[] <- sapply(df_ASV_order, as.numeric)
          #Subsetting for the actual Cluster#
          df_ASV_order   <- df_ASV_order[rownames(df_ASV_order) %in% rownames(df),]
          df_ASV_rel     <- as.data.frame(t(otu_table(pslist$ps_SLWF_21 %>%
            transform_sample_counts(function(x) {x/sum(x)*100}))))
          
          df_ASV_order_rel <- left_join(df_ASV_order %>%
                                  mutate(ASV = rownames(df_ASV_order)) %>%
                                  select("ASV"), df_ASV_rel[names(df_ASV_rel) %in%
                                  names(df_ASV_order)] %>% mutate(ASV = rownames(df_ASV_rel)))
          
          df_ASV_order_rel_mean <- df_ASV_order_rel %>%
            dplyr::select(-ASV) %>%
            dplyr::summarise(across(everything(), mean, na.rm = TRUE))  %>% rowMeans(.)
      ASVs_to_keep[[ASVs_to_keep_length+1]] <- df_ASV_order_rel_mean
      names(ASVs_to_keep)[[ASVs_to_keep_length+1]] <- paste(Replicate2)}
      
      max_name <- names(ASVs_to_keep)[which.max(unlist(ASVs_to_keep))]
      
      df_SSU_order <- df[grepl(paste(max_name), names(df)) & grepl("rel", names(df))]
      df_SSU_order[] <- sapply(df_SSU_order, as.numeric)
      df_SSU_order<- as.data.frame(sort(rowMeans(df_SSU_order), decreasing = T)) 
      names(df_SSU_order) <-paste("Mean", max_name, sep="_")
      df_SSU_order$ASV <- rownames(df_SSU_order)
      
      df <- left_join(df_SSU_order, df)

       Deseq2Cluster_Taxonomy_list[[ Deseq2Cluster_Taxonomy_list_length+1]] <- df
      names( Deseq2Cluster_Taxonomy_list)[[ Deseq2Cluster_Taxonomy_list_length+1]] <- Comparison
    write.csv2(df, file.path(path_Output_16S,paste(paste(save_name,  "16S_DAT_Replicates",paste("Deseq2_",Comparison, sep=""), Date, sep="_"), ".csv", sep="")))

}

#-

8 Summary Plots

8.1 Venn Diagramms of Output

#VennDiagramm of Fish and Watersamples
Venn_list_ps <- list(
"SL" = names(rowSums(t(otu_table(subset_samples(pslist$ps_SLWF_21, SampleID %in% sample_names(pslist$ps_SLWF_21)[grepl("SL", sample_names(pslist$ps_SLWF_21))])))) > 1)[rowSums(t(otu_table(subset_samples(pslist$ps_SLWF_21, SampleID %in% sample_names(pslist$ps_SLWF_21)[grepl("SL", sample_names(pslist$ps_SLWF_21))])))) > 1], 
"WF" = names(rowSums(t(otu_table(subset_samples(pslist$ps_SLWF_21, SampleID %in% sample_names(pslist$ps_SLWF_21)[grepl("WF", sample_names(pslist$ps_SLWF_21))])))) > 1)[rowSums(t(otu_table(subset_samples(pslist$ps_SLWF_21, SampleID %in% sample_names(pslist$ps_SLWF_21)[grepl("WF", sample_names(pslist$ps_SLWF_21))])))) > 1]
)

Venn_plot <- ggVennDiagram::ggVennDiagram(Venn_list_ps, set_color = c("steelblue","darkblue"), edge_size=1, set_size = 5) + 
    scale_color_manual(values = c("steelblue","darkblue")) + theme(legend.position = "none")
Venn_plot

#VennDiagramm of Fish from sampling locations#
Venn_list_Replicates <-  list(
"MG-713" = names(rowSums(t(otu_table(subset_samples(pslist$ps_SLWF_21, SampleID %in% sample_names(pslist$ps_SLWF_21)[grepl("MG", sample_names(pslist$ps_SLWF_21))])))) > 1)[rowSums(t(otu_table(subset_samples(pslist$ps_SLWF_21, SampleID %in% sample_names(pslist$ps_SLWF_21)[grepl("MG", sample_names(pslist$ps_SLWF_21))])))) > 1], 
"BB-692" = names(rowSums(t(otu_table(subset_samples(pslist$ps_SLWF_21, SampleID %in% sample_names(pslist$ps_SLWF_21)[grepl("BB", sample_names(pslist$ps_SLWF_21))])))) > 1)[rowSums(t(otu_table(subset_samples(pslist$ps_SLWF_21, SampleID %in% sample_names(pslist$ps_SLWF_21)[grepl("BB", sample_names(pslist$ps_SLWF_21))])))) > 1], 
"SS-665" = names(rowSums(t(otu_table(subset_samples(pslist$ps_SLWF_21, SampleID %in% sample_names(pslist$ps_SLWF_21)[grepl("SS", sample_names(pslist$ps_SLWF_21))])))) > 1)[rowSums(t(otu_table(subset_samples(pslist$ps_SLWF_21, SampleID %in% sample_names(pslist$ps_SLWF_21)[grepl("SS", sample_names(pslist$ps_SLWF_21))])))) > 1], 
"ML-633" = names(rowSums(t(otu_table(subset_samples(pslist$ps_SLWF_21, SampleID %in% sample_names(pslist$ps_SLWF_21)[grepl("ML", sample_names(pslist$ps_SLWF_21))])))) > 1)[rowSums(t(otu_table(subset_samples(pslist$ps_SLWF_21, SampleID %in% sample_names(pslist$ps_SLWF_21)[grepl("ML", sample_names(pslist$ps_SLWF_21))])))) > 1]
)
Venn_plot_Reps <- ggVennDiagram::ggVennDiagram(Venn_list_Replicates, set_color =  c("steelblue", "#9E9E9E", "#FFA726", "#FF7043"), edge_size=1, set_size = 5) + 
    scale_color_manual(values =  c("steelblue", "#9E9E9E", "#FFA726", "#FF7043")) + theme(legend.position = "none")+
  scale_x_continuous(expand = expansion(mult = .2))
Venn_plot_Reps 

8.2 Publication Plots

##############
#Summary Plot#
##############
cowplot::plot_grid(Species_Accumulation_raw, Species_Accumulation_Filtered, labels = c("A", "B"), ncol = 1, rel_heights = c(0.8,1)) -> part_1
cowplot::plot_grid(part_1, SampleDist_PCA, labels = c("", "C"), ncol = 2) -> part_2
cowplot::plot_grid(part_2, Alpha_Diversity_BarPlot, labels = c("", "D"), ncol = 1) -> part_3
ggsave(part_3, filename = paste(paste(save_name, "SSU_SpecAb_PCA_AlphaBarPlot", Date, sep="_"), ".png", sep=""), path = pathPlots , device='png', dpi=300, width = 12, height = 9)
part_3

#####################
#BacteriaHeat & Venn#
#####################
cowplot::plot_grid(Venn_plot, Venn_plot_Reps, labels = c("A", "B"), rel_heights = c(0.8,1), rel_widths = c(0.8,1),ncol = 2) -> part_1

ggsave(Venn_plot, filename = paste(paste(save_name, Type, "Venn_plot", Date, sep="_"), ".png", sep=""), path = pathPlots , device='png', dpi=300, width = 3,height =2 )

ggsave(Venn_plot_Reps, filename = paste(paste(save_name, Type, "Venn_Reps_plot", Date, sep="_"), ".png", sep=""), path = pathPlots , device='png', dpi=300, width = 4,height =4)

ggsave(Cluster$HeatPlot_prow_SLWF_21, filename = paste(paste(save_name, Type, "Heatmap_plot", Date, sep="_"), ".png", sep=""), path = pathPlots , device='png', dpi=300, width = 8,height =5)
#####################
#BacteriaHeat & Venn# Deseq2
#####################
cowplot::plot_grid(Venn_plot, Venn_plot_Reps, labels = c("A", "B"), rel_heights = c(0.8,1), rel_widths = c(0.8,1),ncol = 2) -> part_1
cowplot::plot_grid(part_1, Cluster$HeatPlot_prow, labels = c("", "C"), ncol = 1, rel_heights = c(0.5,1) ) -> part_2
ggsave(part_2, filename = paste(paste(save_name, Type, "Venn_Deseq2_Heat", Date, sep="_"), ".png", sep=""), path = pathPlots , device='png', dpi=300, width = 10,
height = 9)
part_2

8.3 Supplementary Plots

###########
#S1 Figure#
###########
########################################
#Species_Accumulation & Core Microbiome#
########################################
ggsave(Species_Accumulation_raw, 
       filename = paste(paste(save_name, Type, "Species_Accumulation_raw_plot", Date, sep="_"), ".png", sep=""), path = pathPlots , device='png',
       dpi=300, width = 6, height = 4)
ggsave(Species_Accumulation_Filtered, 
       filename = paste(paste(save_name, Type, "Species_Accumulation_Filtered_plot", Date, sep="_"), ".png", sep=""), path = pathPlots ,
       device='png', dpi=300, width = 6, height = 4)
ggsave(Core_Alpha_Diversity_BarPlot, 
       filename = paste(paste(save_name, Type, "Core_Alpha_Diversity_BarPlot_plot", Date, sep="_"), ".png", sep=""), path = pathPlots ,
       device='png', dpi=300, width = 8, height = 6)
ggsave(RelCoreAbundance_normalized_barplot, 
       filename = paste(paste(save_name, Type, "RelCoreAbundance_normalized_barplot_plot", Date, sep="_"), ".png", sep=""), path = pathPlots ,
       device='png', dpi=300, width = 5, height = 6)




cowplot::plot_grid(Species_Accumulation_raw, Species_Accumulation_Filtered, labels = c("A", "B"), ncol = 2) -> part_1
cowplot::plot_grid(Core_Alpha_Diversity_BarPlot, RelCoreAbundance_normalized_barplot, labels = c("C", "D"), ncol = 2, rel_widths = c(1,0.5)) -> Core_Microbiome_plot
cowplot::plot_grid(part_1, Core_Microbiome_plot, labels = c("", ""), ncol = 1, rel_widths = c(0.5,1)) -> part_3
ggsave(part_3, filename = paste(paste(save_name, Type, "RelCoreAbundance_Core_Microbiome_plot", Date, sep="_"), ".png", sep=""), path = pathPlots , device='png', dpi=300, width = 12, height = 9)
part_3

-

9 Final dataset

saveRDS(pslist, file.path(path_Output_16S,paste(paste(save_name,  "16S_merge_Filter_ASV_besttax", Date, sep="_"), ".rds", sep="")))
pslist <- readRDS(file.path(path_Output_16S,paste(paste(save_name,  "16S_merge_Filter_ASV_besttax", Date, sep="_"), ".rds", sep="")))